xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a201590f66d78a0ee8e1bf4e6f9fcce39f32a809)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr);
187     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
188     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
189     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
190     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
191     rowners[0] = 0;
192     for (i=2; i<=size; i++) {
193       rowners[i] += rowners[i-1];
194     }
195     rstart = rowners[rank];
196     rend   = rowners[rank+1];
197     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
198     if (!rank) {
199       gmata = (Mat_SeqAIJ*) gmat->data;
200       /* send row lengths to all processors */
201       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202       for (i=1; i<size; i++) {
203 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
204       }
205       /* determine number diagonal and off-diagonal counts */
206       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
207       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
208       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
209       jj = 0;
210       for (i=0; i<m; i++) {
211 	for (j=0; j<dlens[i]; j++) {
212           if (gmata->j[jj] < rstart) ld[i]++;
213 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214 	  jj++;
215 	}
216       }
217       /* send column indices to other processes */
218       for (i=1; i<size; i++) {
219 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
222       }
223 
224       /* send numerical values to other processes */
225       for (i=1; i<size; i++) {
226         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
228       }
229       gmataa = gmata->a;
230       gmataj = gmata->j;
231 
232     } else {
233       /* receive row lengths */
234       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
235       /* receive column indices */
236       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
237       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
238       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
239       /* determine number diagonal and off-diagonal counts */
240       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
241       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
242       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
243       jj = 0;
244       for (i=0; i<m; i++) {
245 	for (j=0; j<dlens[i]; j++) {
246           if (gmataj[jj] < rstart) ld[i]++;
247 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248 	  jj++;
249 	}
250       }
251       /* receive numerical values */
252       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
253       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
254     }
255     /* set preallocation */
256     for (i=0; i<m; i++) {
257       dlens[i] -= olens[i];
258     }
259     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
260     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
261 
262     for (i=0; i<m; i++) {
263       dlens[i] += olens[i];
264     }
265     cnt  = 0;
266     for (i=0; i<m; i++) {
267       row  = rstart + i;
268       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
269       cnt += dlens[i];
270     }
271     if (rank) {
272       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
273     }
274     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
275     ierr = PetscFree(rowners);CHKERRQ(ierr);
276     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277     *inmat = mat;
278   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
279     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281     mat   = *inmat;
282     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
283     if (!rank) {
284       /* send numerical values to other processes */
285       gmata = (Mat_SeqAIJ*) gmat->data;
286       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
287       gmataa = gmata->a;
288       for (i=1; i<size; i++) {
289         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
291       }
292       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293     } else {
294       /* receive numerical values from process 0*/
295       nz   = Ad->nz + Ao->nz;
296       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
297       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
298     }
299     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301     ad = Ad->a;
302     ao = Ao->a;
303     if (mat->rmap->n) {
304       i  = 0;
305       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
306       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
307     }
308     for (i=1; i<mat->rmap->n; i++) {
309       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
310       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
311     }
312     i--;
313     if (mat->rmap->n) {
314       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
315     }
316     if (rank) {
317       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
318     }
319   }
320   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
322   CHKMEMQ;
323   PetscFunctionReturn(0);
324 }
325 
326 /*
327   Local utility routine that creates a mapping from the global column
328 number to the local number in the off-diagonal part of the local
329 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
330 a slightly higher hash table cost; without it it is not scalable (each processor
331 has an order N integer array but is fast to acess.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336 {
337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
338   PetscErrorCode ierr;
339   PetscInt       n = aij->B->cmap->n,i;
340 
341   PetscFunctionBegin;
342   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343 #if defined (PETSC_USE_CTABLE)
344   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
345   for (i=0; i<n; i++){
346     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
347   }
348 #else
349   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
350   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
352   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353 #endif
354   PetscFunctionReturn(0);
355 }
356 
357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358 { \
359     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360     lastcol1 = col;\
361     while (high1-low1 > 5) { \
362       t = (low1+high1)/2; \
363       if (rp1[t] > col) high1 = t; \
364       else             low1  = t; \
365     } \
366       for (_i=low1; _i<high1; _i++) { \
367         if (rp1[_i] > col) break; \
368         if (rp1[_i] == col) { \
369           if (addv == ADD_VALUES) ap1[_i] += value;   \
370           else                    ap1[_i] = value; \
371           goto a_noinsert; \
372         } \
373       }  \
374       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
376       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378       N = nrow1++ - 1; a->nz++; high1++; \
379       /* shift up all the later entries in this row */ \
380       for (ii=N; ii>=_i; ii--) { \
381         rp1[ii+1] = rp1[ii]; \
382         ap1[ii+1] = ap1[ii]; \
383       } \
384       rp1[_i] = col;  \
385       ap1[_i] = value;  \
386       a_noinsert: ; \
387       ailen[row] = nrow1; \
388 }
389 
390 
391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392 { \
393     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394     lastcol2 = col;\
395     while (high2-low2 > 5) { \
396       t = (low2+high2)/2; \
397       if (rp2[t] > col) high2 = t; \
398       else             low2  = t; \
399     } \
400     for (_i=low2; _i<high2; _i++) {		\
401       if (rp2[_i] > col) break;			\
402       if (rp2[_i] == col) {			      \
403 	if (addv == ADD_VALUES) ap2[_i] += value;     \
404 	else                    ap2[_i] = value;      \
405 	goto b_noinsert;			      \
406       }						      \
407     }							      \
408     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
410     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412     N = nrow2++ - 1; b->nz++; high2++;					\
413     /* shift up all the later entries in this row */			\
414     for (ii=N; ii>=_i; ii--) {						\
415       rp2[ii+1] = rp2[ii];						\
416       ap2[ii+1] = ap2[ii];						\
417     }									\
418     rp2[_i] = col;							\
419     ap2[_i] = value;							\
420     b_noinsert: ;								\
421     bilen[row] = nrow2;							\
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427 {
428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
429   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
430   PetscErrorCode ierr;
431   PetscInt       l,*garray = mat->garray,diag;
432 
433   PetscFunctionBegin;
434   /* code only works for square matrices A */
435 
436   /* find size of row to the left of the diagonal part */
437   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
438   row  = row - diag;
439   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440     if (garray[b->j[b->i[row]+l]] > diag) break;
441   }
442   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
443 
444   /* diagonal part */
445   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
446 
447   /* right of diagonal part */
448   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValues_MPIAIJ"
454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscScalar    value;
458   PetscErrorCode ierr;
459   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461   PetscBool      roworiented = aij->roworiented;
462 
463   /* Some Variables required in the macro */
464   Mat            A = aij->A;
465   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
466   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467   MatScalar      *aa = a->a;
468   PetscBool      ignorezeroentries = a->ignorezeroentries;
469   Mat            B = aij->B;
470   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
471   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472   MatScalar      *ba = b->a;
473 
474   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475   PetscInt       nonew;
476   MatScalar      *ap1,*ap2;
477 
478   PetscFunctionBegin;
479   if (v) PetscValidScalarPointer(v,6);
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_DEBUG)
483     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row      = im[i] - rstart;
487       lastcol1 = -1;
488       rp1      = aj + ai[row];
489       ap1      = aa + ai[row];
490       rmax1    = aimax[row];
491       nrow1    = ailen[row];
492       low1     = 0;
493       high1    = nrow1;
494       lastcol2 = -1;
495       rp2      = bj + bi[row];
496       ap2      = ba + bi[row];
497       rmax2    = bimax[row];
498       nrow2    = bilen[row];
499       low2     = 0;
500       high2    = nrow2;
501 
502       for (j=0; j<n; j++) {
503         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505         if (in[j] >= cstart && in[j] < cend){
506           col = in[j] - cstart;
507           nonew = a->nonew;
508           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509         } else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_DEBUG)
511         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!aij->colmap) {
516               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 #if defined (PETSC_USE_CTABLE)
519             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520 	    col--;
521 #else
522             col = aij->colmap[in[j]] - 1;
523 #endif
524             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526               col =  in[j];
527               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528               B = aij->B;
529               b = (Mat_SeqAIJ*)B->data;
530               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531               rp2      = bj + bi[row];
532               ap2      = ba + bi[row];
533               rmax2    = bimax[row];
534               nrow2    = bilen[row];
535               low2     = 0;
536               high2    = nrow2;
537               bm       = aij->B->rmap->n;
538               ba = b->a;
539             }
540           } else col = in[j];
541           nonew = b->nonew;
542           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543         }
544       }
545     } else {
546       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547       if (!aij->donotstash) {
548         mat->assembled = PETSC_FALSE;
549         if (roworiented) {
550           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatGetValues_MPIAIJ"
562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563 {
564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
565   PetscErrorCode ierr;
566   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
568 
569   PetscFunctionBegin;
570   for (i=0; i<m; i++) {
571     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573     if (idxm[i] >= rstart && idxm[i] < rend) {
574       row = idxm[i] - rstart;
575       for (j=0; j<n; j++) {
576         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578         if (idxn[j] >= cstart && idxn[j] < cend){
579           col = idxn[j] - cstart;
580           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581         } else {
582           if (!aij->colmap) {
583             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
584           }
585 #if defined (PETSC_USE_CTABLE)
586           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
587           col --;
588 #else
589           col = aij->colmap[idxn[j]] - 1;
590 #endif
591           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592           else {
593             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
594           }
595         }
596       }
597     } else {
598       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609 {
610   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
611   PetscErrorCode ierr;
612   PetscInt       nstash,reallocs;
613   InsertMode     addv;
614 
615   PetscFunctionBegin;
616   if (aij->donotstash || mat->nooffprocentries) {
617     PetscFunctionReturn(0);
618   }
619 
620   /* make sure all processors are either in INSERTMODE or ADDMODE */
621   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
622   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623   mat->insertmode = addv; /* in case this processor had no cache */
624 
625   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
626   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
627   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
637   PetscErrorCode ierr;
638   PetscMPIInt    n;
639   PetscInt       i,j,rstart,ncols,flg;
640   PetscInt       *row,*col;
641   PetscBool      other_disassembled;
642   PetscScalar    *val;
643   InsertMode     addv = mat->insertmode;
644 
645   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
646   PetscFunctionBegin;
647   if (!aij->donotstash && !mat->nooffprocentries) {
648     while (1) {
649       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
650       if (!flg) break;
651 
652       for (i=0; i<n;) {
653         /* Now identify the consecutive vals belonging to the same row */
654         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655         if (j < n) ncols = j-i;
656         else       ncols = n-i;
657         /* Now assemble all these values with a single function call */
658         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
659         i = j;
660       }
661     }
662     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
663   }
664   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
665   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
666 
667   /* determine if any processor has disassembled, if so we must
668      also disassemble ourselfs, in order that we may reassemble. */
669   /*
670      if nonzero structure of submatrix B cannot change then we know that
671      no processor disassembled thus we can skip this stuff
672   */
673   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
674     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
675     if (mat->was_assembled && !other_disassembled) {
676       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
677     }
678   }
679   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
681   }
682   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
684   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
685   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
686 
687   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
688   aij->rowvalues = 0;
689 
690   /* used by MatAXPY() */
691   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
692   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
693 
694   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
695   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696   PetscFunctionReturn(0);
697 }
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702 {
703   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
704   PetscErrorCode ierr;
705 
706   PetscFunctionBegin;
707   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
708   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatZeroRows_MPIAIJ"
714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715 {
716   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode    ierr;
718   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719   PetscInt          i,*owners = A->rmap->range;
720   PetscInt          *nprocs,j,idx,nsends,row;
721   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
722   PetscInt          *rvalues,count,base,slen,*source;
723   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
724   MPI_Comm          comm = ((PetscObject)A)->comm;
725   MPI_Request       *send_waits,*recv_waits;
726   MPI_Status        recv_status,*send_status;
727   const PetscScalar *xx;
728   PetscScalar       *bb;
729 #if defined(PETSC_DEBUG)
730   PetscBool      found = PETSC_FALSE;
731 #endif
732 
733   PetscFunctionBegin;
734   /*  first count number of contributors to each processor */
735   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
736   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
737   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
738   j = 0;
739   for (i=0; i<N; i++) {
740     if (lastidx > (idx = rows[i])) j = 0;
741     lastidx = idx;
742     for (; j<size; j++) {
743       if (idx >= owners[j] && idx < owners[j+1]) {
744         nprocs[2*j]++;
745         nprocs[2*j+1] = 1;
746         owner[i] = j;
747 #if defined(PETSC_DEBUG)
748         found = PETSC_TRUE;
749 #endif
750         break;
751       }
752     }
753 #if defined(PETSC_DEBUG)
754     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755     found = PETSC_FALSE;
756 #endif
757   }
758   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
759 
760   if (A->nooffproczerorows) {
761     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762     nrecvs = nsends;
763     nmax   = N;
764   } else {
765     /* inform other processors of number of messages and max length*/
766     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
767   }
768 
769   /* post receives:   */
770   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
771   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
772   for (i=0; i<nrecvs; i++) {
773     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
774   }
775 
776   /* do sends:
777       1) starts[i] gives the starting index in svalues for stuff going to
778          the ith processor
779   */
780   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
781   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
782   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
783   starts[0] = 0;
784   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785   for (i=0; i<N; i++) {
786     svalues[starts[owner[i]]++] = rows[i];
787   }
788 
789   starts[0] = 0;
790   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791   count = 0;
792   for (i=0; i<size; i++) {
793     if (nprocs[2*i+1]) {
794       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
795     }
796   }
797   ierr = PetscFree(starts);CHKERRQ(ierr);
798 
799   base = owners[rank];
800 
801   /*  wait on receives */
802   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
803   count  = nrecvs; slen = 0;
804   while (count) {
805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
806     /* unpack receives into our local space */
807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
808     source[imdex]  = recv_status.MPI_SOURCE;
809     lens[imdex]    = n;
810     slen          += n;
811     count--;
812   }
813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
814 
815   /* move the data into the send scatter */
816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
817   count = 0;
818   for (i=0; i<nrecvs; i++) {
819     values = rvalues + i*nmax;
820     for (j=0; j<lens[i]; j++) {
821       lrows[count++] = values[j] - base;
822     }
823   }
824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
826   ierr = PetscFree(owner);CHKERRQ(ierr);
827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
828 
829   /* fix right hand side if needed */
830   if (x && b) {
831     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
833     for (i=0; i<slen; i++) {
834       bb[lrows[i]] = diag*xx[lrows[i]];
835     }
836     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
837     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
838   }
839   /*
840         Zero the required rows. If the "diagonal block" of the matrix
841      is square and the user wishes to set the diagonal we use separate
842      code so that MatSetValues() is not called for each diagonal allocating
843      new memory, thus calling lots of mallocs and slowing things down.
844 
845   */
846   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
848   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
850   } else if (diag != 0.0) {
851     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
852     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855     }
856     for (i = 0; i < slen; i++) {
857       row  = lrows[i] + rstart;
858       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
859     }
860     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   } else {
863     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(lrows);CHKERRQ(ierr);
866 
867   /* wait on sends */
868   if (nsends) {
869     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
870     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
871     ierr = PetscFree(send_status);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(send_waits);CHKERRQ(ierr);
874   ierr = PetscFree(svalues);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 #undef __FUNCT__
879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881 {
882   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
883   PetscErrorCode    ierr;
884   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885   PetscInt          i,*owners = A->rmap->range;
886   PetscInt          *nprocs,j,idx,nsends;
887   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
888   PetscInt          *rvalues,count,base,slen,*source;
889   PetscInt          *lens,*lrows,*values,m;
890   MPI_Comm          comm = ((PetscObject)A)->comm;
891   MPI_Request       *send_waits,*recv_waits;
892   MPI_Status        recv_status,*send_status;
893   const PetscScalar *xx;
894   PetscScalar       *bb,*mask;
895   Vec               xmask,lmask;
896   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
897   const PetscInt    *aj, *ii,*ridx;
898   PetscScalar       *aa;
899 #if defined(PETSC_DEBUG)
900   PetscBool         found = PETSC_FALSE;
901 #endif
902 
903   PetscFunctionBegin;
904   /*  first count number of contributors to each processor */
905   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
906   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
907   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
908   j = 0;
909   for (i=0; i<N; i++) {
910     if (lastidx > (idx = rows[i])) j = 0;
911     lastidx = idx;
912     for (; j<size; j++) {
913       if (idx >= owners[j] && idx < owners[j+1]) {
914         nprocs[2*j]++;
915         nprocs[2*j+1] = 1;
916         owner[i] = j;
917 #if defined(PETSC_DEBUG)
918         found = PETSC_TRUE;
919 #endif
920         break;
921       }
922     }
923 #if defined(PETSC_DEBUG)
924     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925     found = PETSC_FALSE;
926 #endif
927   }
928   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
929 
930   /* inform other processors of number of messages and max length*/
931   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
932 
933   /* post receives:   */
934   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
935   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
936   for (i=0; i<nrecvs; i++) {
937     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
938   }
939 
940   /* do sends:
941       1) starts[i] gives the starting index in svalues for stuff going to
942          the ith processor
943   */
944   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
945   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
946   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
947   starts[0] = 0;
948   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949   for (i=0; i<N; i++) {
950     svalues[starts[owner[i]]++] = rows[i];
951   }
952 
953   starts[0] = 0;
954   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955   count = 0;
956   for (i=0; i<size; i++) {
957     if (nprocs[2*i+1]) {
958       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
959     }
960   }
961   ierr = PetscFree(starts);CHKERRQ(ierr);
962 
963   base = owners[rank];
964 
965   /*  wait on receives */
966   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
967   count  = nrecvs; slen = 0;
968   while (count) {
969     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
970     /* unpack receives into our local space */
971     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
972     source[imdex]  = recv_status.MPI_SOURCE;
973     lens[imdex]    = n;
974     slen          += n;
975     count--;
976   }
977   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
978 
979   /* move the data into the send scatter */
980   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
981   count = 0;
982   for (i=0; i<nrecvs; i++) {
983     values = rvalues + i*nmax;
984     for (j=0; j<lens[i]; j++) {
985       lrows[count++] = values[j] - base;
986     }
987   }
988   ierr = PetscFree(rvalues);CHKERRQ(ierr);
989   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
990   ierr = PetscFree(owner);CHKERRQ(ierr);
991   ierr = PetscFree(nprocs);CHKERRQ(ierr);
992   /* lrows are the local rows to be zeroed, slen is the number of local rows */
993 
994   /* zero diagonal part of matrix */
995   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
996 
997   /* handle off diagonal part of matrix */
998   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
999   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1000   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1001   for (i=0; i<slen; i++) {
1002     bb[lrows[i]] = 1;
1003   }
1004   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1005   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1008   if (x) {
1009     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1013   }
1014   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1015 
1016   /* remove zeroed rows of off diagonal matrix */
1017   ii = aij->i;
1018   for (i=0; i<slen; i++) {
1019     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1020   }
1021 
1022   /* loop over all elements of off process part of matrix zeroing removed columns*/
1023   if (aij->compressedrow.use){
1024     m    = aij->compressedrow.nrows;
1025     ii   = aij->compressedrow.i;
1026     ridx = aij->compressedrow.rindex;
1027     for (i=0; i<m; i++){
1028       n   = ii[i+1] - ii[i];
1029       aj  = aij->j + ii[i];
1030       aa  = aij->a + ii[i];
1031 
1032       for (j=0; j<n; j++) {
1033         if (PetscAbsScalar(mask[*aj])) {
1034           if (b) bb[*ridx] -= *aa*xx[*aj];
1035           *aa        = 0.0;
1036         }
1037         aa++;
1038         aj++;
1039       }
1040       ridx++;
1041     }
1042   } else { /* do not use compressed row format */
1043     m = l->B->rmap->n;
1044     for (i=0; i<m; i++) {
1045       n   = ii[i+1] - ii[i];
1046       aj  = aij->j + ii[i];
1047       aa  = aij->a + ii[i];
1048       for (j=0; j<n; j++) {
1049         if (PetscAbsScalar(mask[*aj])) {
1050           if (b) bb[i] -= *aa*xx[*aj];
1051           *aa    = 0.0;
1052         }
1053         aa++;
1054         aj++;
1055       }
1056     }
1057   }
1058   if (x) {
1059     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1060     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1061   }
1062   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1063   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1064   ierr = PetscFree(lrows);CHKERRQ(ierr);
1065 
1066   /* wait on sends */
1067   if (nsends) {
1068     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1069     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1070     ierr = PetscFree(send_status);CHKERRQ(ierr);
1071   }
1072   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1073   ierr = PetscFree(svalues);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMult_MPIAIJ"
1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   PetscInt       nt;
1085 
1086   PetscFunctionBegin;
1087   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1088   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1091   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129   PetscBool      merged;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   if (!merged) {
1136     /* send it on its way */
1137     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138     /* do local part */
1139     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140     /* receive remote parts: note this assumes the values are not actually */
1141     /* added in yy until the next line, */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   } else {
1144     /* do local part */
1145     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1146     /* send it on its way */
1147     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148     /* values actually were received in the Begin() but we need to call this nop */
1149     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 EXTERN_C_BEGIN
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1157 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1158 {
1159   MPI_Comm       comm;
1160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162   IS             Me,Notme;
1163   PetscErrorCode ierr;
1164   PetscInt       M,N,first,last,*notme,i;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168 
1169   /* Easy test: symmetric diagonal block */
1170   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195    PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1213   /* receive remote parts */
1214   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /*
1219   This only works correctly for square matrices where the subblock A->A is the
1220    diagonal block
1221 */
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225 {
1226   PetscErrorCode ierr;
1227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1228 
1229   PetscFunctionBegin;
1230   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatScale_MPIAIJ"
1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239 {
1240   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1245   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatDestroy_MPIAIJ"
1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined (PETSC_USE_CTABLE)
1265   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1273   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1274   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291 {
1292   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1293   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1294   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1295   PetscErrorCode    ierr;
1296   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1297   int               fd;
1298   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1299   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300   PetscScalar       *column_values;
1301   PetscInt          message_count,flowcontrolcount;
1302 
1303   PetscFunctionBegin;
1304   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1305   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1306   nz   = A->nz + B->nz;
1307   if (!rank) {
1308     header[0] = MAT_FILE_CLASSID;
1309     header[1] = mat->rmap->N;
1310     header[2] = mat->cmap->N;
1311     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1312     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1313     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1314     /* get largest number of rows any processor has */
1315     rlen = mat->rmap->n;
1316     range = mat->rmap->range;
1317     for (i=1; i<size; i++) {
1318       rlen = PetscMax(rlen,range[i+1] - range[i]);
1319     }
1320   } else {
1321     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1322     rlen = mat->rmap->n;
1323   }
1324 
1325   /* load up the local row counts */
1326   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1327   for (i=0; i<mat->rmap->n; i++) {
1328     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1329   }
1330 
1331   /* store the row lengths to the file */
1332   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1333   if (!rank) {
1334     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     for (i=1; i<size; i++) {
1336       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1337       rlen = range[i+1] - range[i];
1338       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1339       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1340     }
1341     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1342   } else {
1343     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1344     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1346   }
1347   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1348 
1349   /* load up the local column indices */
1350   nzmax = nz; /* )th processor needs space a largest processor needs */
1351   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1352   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1353   cnt  = 0;
1354   for (i=0; i<mat->rmap->n; i++) {
1355     for (j=B->i[i]; j<B->i[i+1]; j++) {
1356       if ( (col = garray[B->j[j]]) > cstart) break;
1357       column_indices[cnt++] = col;
1358     }
1359     for (k=A->i[i]; k<A->i[i+1]; k++) {
1360       column_indices[cnt++] = A->j[k] + cstart;
1361     }
1362     for (; j<B->i[i+1]; j++) {
1363       column_indices[cnt++] = garray[B->j[j]];
1364     }
1365   }
1366   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1367 
1368   /* store the column indices to the file */
1369    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1370   if (!rank) {
1371     MPI_Status status;
1372     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1373     for (i=1; i<size; i++) {
1374       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1375       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1376       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1377       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1378       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1379     }
1380      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1381   } else {
1382     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1383     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1384     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1385     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1386   }
1387   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1388 
1389   /* load up the local column values */
1390   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1391   cnt  = 0;
1392   for (i=0; i<mat->rmap->n; i++) {
1393     for (j=B->i[i]; j<B->i[i+1]; j++) {
1394       if ( garray[B->j[j]] > cstart) break;
1395       column_values[cnt++] = B->a[j];
1396     }
1397     for (k=A->i[i]; k<A->i[i+1]; k++) {
1398       column_values[cnt++] = A->a[k];
1399     }
1400     for (; j<B->i[i+1]; j++) {
1401       column_values[cnt++] = B->a[j];
1402     }
1403   }
1404   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1405 
1406   /* store the column values to the file */
1407    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1408   if (!rank) {
1409     MPI_Status status;
1410     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1411     for (i=1; i<size; i++) {
1412        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1413       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1414       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1415       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1416       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1417     }
1418     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1419   } else {
1420     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1421     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1422     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1423     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1424   }
1425   ierr = PetscFree(column_values);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1432 {
1433   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1434   PetscErrorCode    ierr;
1435   PetscMPIInt       rank = aij->rank,size = aij->size;
1436   PetscBool         isdraw,iascii,isbinary;
1437   PetscViewer       sviewer;
1438   PetscViewerFormat format;
1439 
1440   PetscFunctionBegin;
1441   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1442   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1443   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1444   if (iascii) {
1445     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1446     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1447       MatInfo    info;
1448       PetscBool  inodes;
1449 
1450       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1451       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1452       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1453       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1454       if (!inodes) {
1455         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1456 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1457       } else {
1458         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1459 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1460       }
1461       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1463       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1464       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1465       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1467       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1468       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1469       PetscFunctionReturn(0);
1470     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1471       PetscInt   inodecount,inodelimit,*inodes;
1472       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1473       if (inodes) {
1474         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1475       } else {
1476         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1477       }
1478       PetscFunctionReturn(0);
1479     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1480       PetscFunctionReturn(0);
1481     }
1482   } else if (isbinary) {
1483     if (size == 1) {
1484       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1485       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1486     } else {
1487       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1488     }
1489     PetscFunctionReturn(0);
1490   } else if (isdraw) {
1491     PetscDraw  draw;
1492     PetscBool  isnull;
1493     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1494     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1495   }
1496 
1497   if (size == 1) {
1498     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1499     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1500   } else {
1501     /* assemble the entire matrix onto first processor. */
1502     Mat         A;
1503     Mat_SeqAIJ  *Aloc;
1504     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1505     MatScalar   *a;
1506 
1507     if (mat->rmap->N > 1024) {
1508       PetscBool  flg = PETSC_FALSE;
1509 
1510       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1511       if (!flg) {
1512         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1513       }
1514     }
1515 
1516     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1517     if (!rank) {
1518       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1519     } else {
1520       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1521     }
1522     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1523     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1524     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1525     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1526     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1527 
1528     /* copy over the A part */
1529     Aloc = (Mat_SeqAIJ*)aij->A->data;
1530     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1531     row = mat->rmap->rstart;
1532     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1533     for (i=0; i<m; i++) {
1534       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1535       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1536     }
1537     aj = Aloc->j;
1538     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1539 
1540     /* copy over the B part */
1541     Aloc = (Mat_SeqAIJ*)aij->B->data;
1542     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1543     row  = mat->rmap->rstart;
1544     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1545     ct   = cols;
1546     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1547     for (i=0; i<m; i++) {
1548       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1549       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1550     }
1551     ierr = PetscFree(ct);CHKERRQ(ierr);
1552     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1554     /*
1555        Everyone has to call to draw the matrix since the graphics waits are
1556        synchronized across all processors that share the PetscDraw object
1557     */
1558     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1559     if (!rank) {
1560       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1561       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1562       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1563       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1564     }
1565     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1566     ierr = MatDestroy(&A);CHKERRQ(ierr);
1567   }
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatView_MPIAIJ"
1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1574 {
1575   PetscErrorCode ierr;
1576   PetscBool      iascii,isdraw,issocket,isbinary;
1577 
1578   PetscFunctionBegin;
1579   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1580   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1581   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1582   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1583   if (iascii || isdraw || isbinary || issocket) {
1584     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1585   } else {
1586     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatSOR_MPIAIJ"
1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1594 {
1595   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1596   PetscErrorCode ierr;
1597   Vec            bb1 = 0;
1598   PetscBool      hasop;
1599 
1600   PetscFunctionBegin;
1601   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1602     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1603   }
1604 
1605   if (flag == SOR_APPLY_UPPER) {
1606     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1607     PetscFunctionReturn(0);
1608   }
1609 
1610   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1611     if (flag & SOR_ZERO_INITIAL_GUESS) {
1612       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1613       its--;
1614     }
1615 
1616     while (its--) {
1617       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1618       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1619 
1620       /* update rhs: bb1 = bb - B*x */
1621       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1622       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1623 
1624       /* local sweep */
1625       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1626     }
1627   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1628     if (flag & SOR_ZERO_INITIAL_GUESS) {
1629       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1630       its--;
1631     }
1632     while (its--) {
1633       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1634       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1635 
1636       /* update rhs: bb1 = bb - B*x */
1637       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1638       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1639 
1640       /* local sweep */
1641       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1642     }
1643   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1644     if (flag & SOR_ZERO_INITIAL_GUESS) {
1645       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1646       its--;
1647     }
1648     while (its--) {
1649       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1650       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1651 
1652       /* update rhs: bb1 = bb - B*x */
1653       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1654       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1655 
1656       /* local sweep */
1657       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1658     }
1659   }  else if (flag & SOR_EISENSTAT) {
1660     Vec         xx1;
1661 
1662     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1663     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1664 
1665     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1666     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667     if (!mat->diag) {
1668       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1669       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1670     }
1671     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1672     if (hasop) {
1673       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1674     } else {
1675       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1676     }
1677     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1678 
1679     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1680 
1681     /* local sweep */
1682     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1683     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1684     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1685   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1686 
1687   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatPermute_MPIAIJ"
1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1694 {
1695   MPI_Comm       comm;
1696   PetscInt       first,local_rowsize,local_colsize;
1697   const PetscInt *rows;
1698   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1699   PetscErrorCode ierr;
1700 
1701   PetscFunctionBegin;
1702   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1703   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1704   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1705   /* collect the global row permutation and invert it */
1706   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1707   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1708   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1709   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1710   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1711   /* get the local target indices */
1712   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1713   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1714   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1715   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1716   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1717   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1718   /* the column permutation is so much easier;
1719      make a local version of 'colp' and invert it */
1720   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1721   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1722   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1723   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1724   /* now we just get the submatrix */
1725   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1726   /* clean up */
1727   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1728   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1735 {
1736   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1737   Mat            A = mat->A,B = mat->B;
1738   PetscErrorCode ierr;
1739   PetscReal      isend[5],irecv[5];
1740 
1741   PetscFunctionBegin;
1742   info->block_size     = 1.0;
1743   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1744   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1745   isend[3] = info->memory;  isend[4] = info->mallocs;
1746   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1747   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1748   isend[3] += info->memory;  isend[4] += info->mallocs;
1749   if (flag == MAT_LOCAL) {
1750     info->nz_used      = isend[0];
1751     info->nz_allocated = isend[1];
1752     info->nz_unneeded  = isend[2];
1753     info->memory       = isend[3];
1754     info->mallocs      = isend[4];
1755   } else if (flag == MAT_GLOBAL_MAX) {
1756     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1757     info->nz_used      = irecv[0];
1758     info->nz_allocated = irecv[1];
1759     info->nz_unneeded  = irecv[2];
1760     info->memory       = irecv[3];
1761     info->mallocs      = irecv[4];
1762   } else if (flag == MAT_GLOBAL_SUM) {
1763     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1764     info->nz_used      = irecv[0];
1765     info->nz_allocated = irecv[1];
1766     info->nz_unneeded  = irecv[2];
1767     info->memory       = irecv[3];
1768     info->mallocs      = irecv[4];
1769   }
1770   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1771   info->fill_ratio_needed = 0;
1772   info->factor_mallocs    = 0;
1773 
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatSetOption_MPIAIJ"
1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1780 {
1781   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1782   PetscErrorCode ierr;
1783 
1784   PetscFunctionBegin;
1785   switch (op) {
1786   case MAT_NEW_NONZERO_LOCATIONS:
1787   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1788   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1789   case MAT_KEEP_NONZERO_PATTERN:
1790   case MAT_NEW_NONZERO_LOCATION_ERR:
1791   case MAT_USE_INODES:
1792   case MAT_IGNORE_ZERO_ENTRIES:
1793     MatCheckPreallocated(A,1);
1794     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1795     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1796     break;
1797   case MAT_ROW_ORIENTED:
1798     a->roworiented = flg;
1799     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1800     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1801     break;
1802   case MAT_NEW_DIAGONALS:
1803     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1804     break;
1805   case MAT_IGNORE_OFF_PROC_ENTRIES:
1806     a->donotstash = flg;
1807     break;
1808   case MAT_SPD:
1809     A->spd_set                         = PETSC_TRUE;
1810     A->spd                             = flg;
1811     if (flg) {
1812       A->symmetric                     = PETSC_TRUE;
1813       A->structurally_symmetric        = PETSC_TRUE;
1814       A->symmetric_set                 = PETSC_TRUE;
1815       A->structurally_symmetric_set    = PETSC_TRUE;
1816     }
1817     break;
1818   case MAT_SYMMETRIC:
1819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1820     break;
1821   case MAT_STRUCTURALLY_SYMMETRIC:
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     break;
1824   case MAT_HERMITIAN:
1825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1826     break;
1827   case MAT_SYMMETRY_ETERNAL:
1828     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1829     break;
1830   default:
1831     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1832   }
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 #undef __FUNCT__
1837 #define __FUNCT__ "MatGetRow_MPIAIJ"
1838 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1839 {
1840   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1841   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1842   PetscErrorCode ierr;
1843   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1844   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1845   PetscInt       *cmap,*idx_p;
1846 
1847   PetscFunctionBegin;
1848   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1849   mat->getrowactive = PETSC_TRUE;
1850 
1851   if (!mat->rowvalues && (idx || v)) {
1852     /*
1853         allocate enough space to hold information from the longest row.
1854     */
1855     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1856     PetscInt   max = 1,tmp;
1857     for (i=0; i<matin->rmap->n; i++) {
1858       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1859       if (max < tmp) { max = tmp; }
1860     }
1861     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1862   }
1863 
1864   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1865   lrow = row - rstart;
1866 
1867   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1868   if (!v)   {pvA = 0; pvB = 0;}
1869   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1870   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1871   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1872   nztot = nzA + nzB;
1873 
1874   cmap  = mat->garray;
1875   if (v  || idx) {
1876     if (nztot) {
1877       /* Sort by increasing column numbers, assuming A and B already sorted */
1878       PetscInt imark = -1;
1879       if (v) {
1880         *v = v_p = mat->rowvalues;
1881         for (i=0; i<nzB; i++) {
1882           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1883           else break;
1884         }
1885         imark = i;
1886         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1887         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1888       }
1889       if (idx) {
1890         *idx = idx_p = mat->rowindices;
1891         if (imark > -1) {
1892           for (i=0; i<imark; i++) {
1893             idx_p[i] = cmap[cworkB[i]];
1894           }
1895         } else {
1896           for (i=0; i<nzB; i++) {
1897             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1898             else break;
1899           }
1900           imark = i;
1901         }
1902         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1903         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1904       }
1905     } else {
1906       if (idx) *idx = 0;
1907       if (v)   *v   = 0;
1908     }
1909   }
1910   *nz = nztot;
1911   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1912   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1918 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1919 {
1920   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1921 
1922   PetscFunctionBegin;
1923   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1924   aij->getrowactive = PETSC_FALSE;
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #undef __FUNCT__
1929 #define __FUNCT__ "MatNorm_MPIAIJ"
1930 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1931 {
1932   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1933   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1934   PetscErrorCode ierr;
1935   PetscInt       i,j,cstart = mat->cmap->rstart;
1936   PetscReal      sum = 0.0;
1937   MatScalar      *v;
1938 
1939   PetscFunctionBegin;
1940   if (aij->size == 1) {
1941     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1942   } else {
1943     if (type == NORM_FROBENIUS) {
1944       v = amat->a;
1945       for (i=0; i<amat->nz; i++) {
1946 #if defined(PETSC_USE_COMPLEX)
1947         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1948 #else
1949         sum += (*v)*(*v); v++;
1950 #endif
1951       }
1952       v = bmat->a;
1953       for (i=0; i<bmat->nz; i++) {
1954 #if defined(PETSC_USE_COMPLEX)
1955         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1956 #else
1957         sum += (*v)*(*v); v++;
1958 #endif
1959       }
1960       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1961       *norm = PetscSqrtReal(*norm);
1962     } else if (type == NORM_1) { /* max column norm */
1963       PetscReal *tmp,*tmp2;
1964       PetscInt  *jj,*garray = aij->garray;
1965       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1966       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1967       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1968       *norm = 0.0;
1969       v = amat->a; jj = amat->j;
1970       for (j=0; j<amat->nz; j++) {
1971         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1972       }
1973       v = bmat->a; jj = bmat->j;
1974       for (j=0; j<bmat->nz; j++) {
1975         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1976       }
1977       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1978       for (j=0; j<mat->cmap->N; j++) {
1979         if (tmp2[j] > *norm) *norm = tmp2[j];
1980       }
1981       ierr = PetscFree(tmp);CHKERRQ(ierr);
1982       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1983     } else if (type == NORM_INFINITY) { /* max row norm */
1984       PetscReal ntemp = 0.0;
1985       for (j=0; j<aij->A->rmap->n; j++) {
1986         v = amat->a + amat->i[j];
1987         sum = 0.0;
1988         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1989           sum += PetscAbsScalar(*v); v++;
1990         }
1991         v = bmat->a + bmat->i[j];
1992         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1993           sum += PetscAbsScalar(*v); v++;
1994         }
1995         if (sum > ntemp) ntemp = sum;
1996       }
1997       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1998     } else {
1999       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2000     }
2001   }
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 #undef __FUNCT__
2006 #define __FUNCT__ "MatTranspose_MPIAIJ"
2007 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2008 {
2009   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2010   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2011   PetscErrorCode ierr;
2012   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2013   PetscInt       cstart=A->cmap->rstart,ncol;
2014   Mat            B;
2015   MatScalar      *array;
2016 
2017   PetscFunctionBegin;
2018   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2019 
2020   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2021   ai = Aloc->i; aj = Aloc->j;
2022   bi = Bloc->i; bj = Bloc->j;
2023   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2024     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2025     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2026     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2027     for (i=0; i<ai[ma]; i++){
2028       d_nnz[aj[i]] ++;
2029       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2030     }
2031 
2032     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2033     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2034     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2035     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2036     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2037     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2038     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2039   } else {
2040     B = *matout;
2041     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2042     for (i=0; i<ai[ma]; i++){
2043       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2044     }
2045   }
2046 
2047   /* copy over the A part */
2048   array = Aloc->a;
2049   row = A->rmap->rstart;
2050   for (i=0; i<ma; i++) {
2051     ncol = ai[i+1]-ai[i];
2052     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2053     row++; array += ncol; aj += ncol;
2054   }
2055   aj = Aloc->j;
2056   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2057 
2058   /* copy over the B part */
2059   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2060   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2061   array = Bloc->a;
2062   row = A->rmap->rstart;
2063   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2064   cols_tmp = cols;
2065   for (i=0; i<mb; i++) {
2066     ncol = bi[i+1]-bi[i];
2067     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2068     row++; array += ncol; cols_tmp += ncol;
2069   }
2070   ierr = PetscFree(cols);CHKERRQ(ierr);
2071 
2072   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2073   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2074   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2075     *matout = B;
2076   } else {
2077     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2078   }
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 #undef __FUNCT__
2083 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2084 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2085 {
2086   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2087   Mat            a = aij->A,b = aij->B;
2088   PetscErrorCode ierr;
2089   PetscInt       s1,s2,s3;
2090 
2091   PetscFunctionBegin;
2092   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2093   if (rr) {
2094     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2095     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2096     /* Overlap communication with computation. */
2097     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2098   }
2099   if (ll) {
2100     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2101     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2102     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2103   }
2104   /* scale  the diagonal block */
2105   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2106 
2107   if (rr) {
2108     /* Do a scatter end and then right scale the off-diagonal block */
2109     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2110     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2111   }
2112 
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2119 {
2120   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2121   PetscErrorCode ierr;
2122 
2123   PetscFunctionBegin;
2124   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #undef __FUNCT__
2129 #define __FUNCT__ "MatEqual_MPIAIJ"
2130 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2131 {
2132   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2133   Mat            a,b,c,d;
2134   PetscBool      flg;
2135   PetscErrorCode ierr;
2136 
2137   PetscFunctionBegin;
2138   a = matA->A; b = matA->B;
2139   c = matB->A; d = matB->B;
2140 
2141   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2142   if (flg) {
2143     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2144   }
2145   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 #undef __FUNCT__
2150 #define __FUNCT__ "MatCopy_MPIAIJ"
2151 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2152 {
2153   PetscErrorCode ierr;
2154   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2155   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2156 
2157   PetscFunctionBegin;
2158   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2159   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2160     /* because of the column compression in the off-processor part of the matrix a->B,
2161        the number of columns in a->B and b->B may be different, hence we cannot call
2162        the MatCopy() directly on the two parts. If need be, we can provide a more
2163        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2164        then copying the submatrices */
2165     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2166   } else {
2167     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2168     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2169   }
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 #undef __FUNCT__
2174 #define __FUNCT__ "MatSetUp_MPIAIJ"
2175 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2176 {
2177   PetscErrorCode ierr;
2178 
2179   PetscFunctionBegin;
2180   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 #undef __FUNCT__
2185 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2186 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2187 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2188 {
2189   PetscInt          i,m=Y->rmap->N;
2190   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2191   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2192   const PetscInt    *xi = x->i,*yi = y->i;
2193 
2194   PetscFunctionBegin;
2195   /* Set the number of nonzeros in the new matrix */
2196   for(i=0; i<m; i++) {
2197     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2198     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2199     nnz[i] = 0;
2200     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2201       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2202       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2203       nnz[i]++;
2204     }
2205     for (; k<nzy; k++) nnz[i]++;
2206   }
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 #undef __FUNCT__
2211 #define __FUNCT__ "MatAXPY_MPIAIJ"
2212 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2213 {
2214   PetscErrorCode ierr;
2215   PetscInt       i;
2216   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2217   PetscBLASInt   bnz,one=1;
2218   Mat_SeqAIJ     *x,*y;
2219 
2220   PetscFunctionBegin;
2221   if (str == SAME_NONZERO_PATTERN) {
2222     PetscScalar alpha = a;
2223     x = (Mat_SeqAIJ *)xx->A->data;
2224     y = (Mat_SeqAIJ *)yy->A->data;
2225     bnz = PetscBLASIntCast(x->nz);
2226     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2227     x = (Mat_SeqAIJ *)xx->B->data;
2228     y = (Mat_SeqAIJ *)yy->B->data;
2229     bnz = PetscBLASIntCast(x->nz);
2230     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2231   } else if (str == SUBSET_NONZERO_PATTERN) {
2232     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2233 
2234     x = (Mat_SeqAIJ *)xx->B->data;
2235     y = (Mat_SeqAIJ *)yy->B->data;
2236     if (y->xtoy && y->XtoY != xx->B) {
2237       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2238       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2239     }
2240     if (!y->xtoy) { /* get xtoy */
2241       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2242       y->XtoY = xx->B;
2243       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2244     }
2245     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2246   } else {
2247     Mat B;
2248     PetscInt *nnz_d,*nnz_o;
2249     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2250     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2251     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2252     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2253     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2254     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2255     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2256     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2257     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2258     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2259     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2260     ierr = MatHeaderReplace(Y,B);
2261     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2262     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2263   }
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatConjugate_MPIAIJ"
2271 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2272 {
2273 #if defined(PETSC_USE_COMPLEX)
2274   PetscErrorCode ierr;
2275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2276 
2277   PetscFunctionBegin;
2278   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2279   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2280 #else
2281   PetscFunctionBegin;
2282 #endif
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatRealPart_MPIAIJ"
2288 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2289 {
2290   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2291   PetscErrorCode ierr;
2292 
2293   PetscFunctionBegin;
2294   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2295   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 #undef __FUNCT__
2300 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2301 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2302 {
2303   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2304   PetscErrorCode ierr;
2305 
2306   PetscFunctionBegin;
2307   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2308   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2309   PetscFunctionReturn(0);
2310 }
2311 
2312 #ifdef PETSC_HAVE_PBGL
2313 
2314 #include <boost/parallel/mpi/bsp_process_group.hpp>
2315 #include <boost/graph/distributed/ilu_default_graph.hpp>
2316 #include <boost/graph/distributed/ilu_0_block.hpp>
2317 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2318 #include <boost/graph/distributed/petsc/interface.hpp>
2319 #include <boost/multi_array.hpp>
2320 #include <boost/parallel/distributed_property_map->hpp>
2321 
2322 #undef __FUNCT__
2323 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2324 /*
2325   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2326 */
2327 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2328 {
2329   namespace petsc = boost::distributed::petsc;
2330 
2331   namespace graph_dist = boost::graph::distributed;
2332   using boost::graph::distributed::ilu_default::process_group_type;
2333   using boost::graph::ilu_permuted;
2334 
2335   PetscBool       row_identity, col_identity;
2336   PetscContainer  c;
2337   PetscInt        m, n, M, N;
2338   PetscErrorCode  ierr;
2339 
2340   PetscFunctionBegin;
2341   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2342   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2343   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2344   if (!row_identity || !col_identity) {
2345     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2346   }
2347 
2348   process_group_type pg;
2349   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2350   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2351   lgraph_type&   level_graph = *lgraph_p;
2352   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2353 
2354   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2355   ilu_permuted(level_graph);
2356 
2357   /* put together the new matrix */
2358   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2359   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2360   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2361   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2362   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2363   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2364   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2365   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2366 
2367   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2368   ierr = PetscContainerSetPointer(c, lgraph_p);
2369   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2370   ierr = PetscContainerDestroy(&c);
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2376 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2377 {
2378   PetscFunctionBegin;
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 #undef __FUNCT__
2383 #define __FUNCT__ "MatSolve_MPIAIJ"
2384 /*
2385   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2386 */
2387 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2388 {
2389   namespace graph_dist = boost::graph::distributed;
2390 
2391   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2392   lgraph_type*   lgraph_p;
2393   PetscContainer c;
2394   PetscErrorCode ierr;
2395 
2396   PetscFunctionBegin;
2397   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2398   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2399   ierr = VecCopy(b, x);CHKERRQ(ierr);
2400 
2401   PetscScalar* array_x;
2402   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2403   PetscInt sx;
2404   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2405 
2406   PetscScalar* array_b;
2407   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2408   PetscInt sb;
2409   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2410 
2411   lgraph_type&   level_graph = *lgraph_p;
2412   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2413 
2414   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2415   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2416                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2417 
2418   typedef boost::iterator_property_map<array_ref_type::iterator,
2419                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2420   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2421                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2422 
2423   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2424 
2425   PetscFunctionReturn(0);
2426 }
2427 #endif
2428 
2429 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2430   PetscInt       nzlocal,nsends,nrecvs;
2431   PetscMPIInt    *send_rank,*recv_rank;
2432   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2433   PetscScalar    *sbuf_a,**rbuf_a;
2434   PetscErrorCode (*Destroy)(Mat);
2435 } Mat_Redundant;
2436 
2437 #undef __FUNCT__
2438 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2439 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2440 {
2441   PetscErrorCode       ierr;
2442   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2443   PetscInt             i;
2444 
2445   PetscFunctionBegin;
2446   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2447   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2448   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2449   for (i=0; i<redund->nrecvs; i++){
2450     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2451     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2452   }
2453   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2454   ierr = PetscFree(redund);CHKERRQ(ierr);
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 #undef __FUNCT__
2459 #define __FUNCT__ "MatDestroy_MatRedundant"
2460 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2461 {
2462   PetscErrorCode  ierr;
2463   PetscContainer  container;
2464   Mat_Redundant   *redund=PETSC_NULL;
2465 
2466   PetscFunctionBegin;
2467   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2468   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2469   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2470   A->ops->destroy = redund->Destroy;
2471   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2472   if (A->ops->destroy) {
2473     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2474   }
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2480 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2481 {
2482   PetscMPIInt    rank,size;
2483   MPI_Comm       comm=((PetscObject)mat)->comm;
2484   PetscErrorCode ierr;
2485   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2486   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2487   PetscInt       *rowrange=mat->rmap->range;
2488   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2489   Mat            A=aij->A,B=aij->B,C=*matredundant;
2490   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2491   PetscScalar    *sbuf_a;
2492   PetscInt       nzlocal=a->nz+b->nz;
2493   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2494   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2495   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2496   MatScalar      *aworkA,*aworkB;
2497   PetscScalar    *vals;
2498   PetscMPIInt    tag1,tag2,tag3,imdex;
2499   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2500                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2501   MPI_Status     recv_status,*send_status;
2502   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2503   PetscInt       **rbuf_j=PETSC_NULL;
2504   PetscScalar    **rbuf_a=PETSC_NULL;
2505   Mat_Redundant  *redund=PETSC_NULL;
2506   PetscContainer container;
2507 
2508   PetscFunctionBegin;
2509   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2510   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2511 
2512   if (reuse == MAT_REUSE_MATRIX) {
2513     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2514     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2515     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2516     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2517     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2518     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2519     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2520     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2521 
2522     nsends    = redund->nsends;
2523     nrecvs    = redund->nrecvs;
2524     send_rank = redund->send_rank;
2525     recv_rank = redund->recv_rank;
2526     sbuf_nz   = redund->sbuf_nz;
2527     rbuf_nz   = redund->rbuf_nz;
2528     sbuf_j    = redund->sbuf_j;
2529     sbuf_a    = redund->sbuf_a;
2530     rbuf_j    = redund->rbuf_j;
2531     rbuf_a    = redund->rbuf_a;
2532   }
2533 
2534   if (reuse == MAT_INITIAL_MATRIX){
2535     PetscMPIInt  subrank,subsize;
2536     PetscInt     nleftover,np_subcomm;
2537     /* get the destination processors' id send_rank, nsends and nrecvs */
2538     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2539     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2540     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2541     np_subcomm = size/nsubcomm;
2542     nleftover  = size - nsubcomm*np_subcomm;
2543     nsends = 0; nrecvs = 0;
2544     for (i=0; i<size; i++){ /* i=rank*/
2545       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2546         send_rank[nsends] = i; nsends++;
2547         recv_rank[nrecvs++] = i;
2548       }
2549     }
2550     if (rank >= size - nleftover){/* this proc is a leftover processor */
2551       i = size-nleftover-1;
2552       j = 0;
2553       while (j < nsubcomm - nleftover){
2554         send_rank[nsends++] = i;
2555         i--; j++;
2556       }
2557     }
2558 
2559     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2560       for (i=0; i<nleftover; i++){
2561         recv_rank[nrecvs++] = size-nleftover+i;
2562       }
2563     }
2564 
2565     /* allocate sbuf_j, sbuf_a */
2566     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2567     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2568     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2569   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2570 
2571   /* copy mat's local entries into the buffers */
2572   if (reuse == MAT_INITIAL_MATRIX){
2573     rownz_max = 0;
2574     rptr = sbuf_j;
2575     cols = sbuf_j + rend-rstart + 1;
2576     vals = sbuf_a;
2577     rptr[0] = 0;
2578     for (i=0; i<rend-rstart; i++){
2579       row = i + rstart;
2580       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2581       ncols  = nzA + nzB;
2582       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2583       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2584       /* load the column indices for this row into cols */
2585       lwrite = 0;
2586       for (l=0; l<nzB; l++) {
2587         if ((ctmp = bmap[cworkB[l]]) < cstart){
2588           vals[lwrite]   = aworkB[l];
2589           cols[lwrite++] = ctmp;
2590         }
2591       }
2592       for (l=0; l<nzA; l++){
2593         vals[lwrite]   = aworkA[l];
2594         cols[lwrite++] = cstart + cworkA[l];
2595       }
2596       for (l=0; l<nzB; l++) {
2597         if ((ctmp = bmap[cworkB[l]]) >= cend){
2598           vals[lwrite]   = aworkB[l];
2599           cols[lwrite++] = ctmp;
2600         }
2601       }
2602       vals += ncols;
2603       cols += ncols;
2604       rptr[i+1] = rptr[i] + ncols;
2605       if (rownz_max < ncols) rownz_max = ncols;
2606     }
2607     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2608   } else { /* only copy matrix values into sbuf_a */
2609     rptr = sbuf_j;
2610     vals = sbuf_a;
2611     rptr[0] = 0;
2612     for (i=0; i<rend-rstart; i++){
2613       row = i + rstart;
2614       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2615       ncols  = nzA + nzB;
2616       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2617       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2618       lwrite = 0;
2619       for (l=0; l<nzB; l++) {
2620         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2621       }
2622       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2623       for (l=0; l<nzB; l++) {
2624         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2625       }
2626       vals += ncols;
2627       rptr[i+1] = rptr[i] + ncols;
2628     }
2629   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2630 
2631   /* send nzlocal to others, and recv other's nzlocal */
2632   /*--------------------------------------------------*/
2633   if (reuse == MAT_INITIAL_MATRIX){
2634     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2635     s_waits2 = s_waits3 + nsends;
2636     s_waits1 = s_waits2 + nsends;
2637     r_waits1 = s_waits1 + nsends;
2638     r_waits2 = r_waits1 + nrecvs;
2639     r_waits3 = r_waits2 + nrecvs;
2640   } else {
2641     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2642     r_waits3 = s_waits3 + nsends;
2643   }
2644 
2645   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2646   if (reuse == MAT_INITIAL_MATRIX){
2647     /* get new tags to keep the communication clean */
2648     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2649     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2650     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2651 
2652     /* post receives of other's nzlocal */
2653     for (i=0; i<nrecvs; i++){
2654       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2655     }
2656     /* send nzlocal to others */
2657     for (i=0; i<nsends; i++){
2658       sbuf_nz[i] = nzlocal;
2659       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2660     }
2661     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2662     count = nrecvs;
2663     while (count) {
2664       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2665       recv_rank[imdex] = recv_status.MPI_SOURCE;
2666       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2667       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2668 
2669       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2670       rbuf_nz[imdex] += i + 2;
2671       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2672       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2673       count--;
2674     }
2675     /* wait on sends of nzlocal */
2676     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2677     /* send mat->i,j to others, and recv from other's */
2678     /*------------------------------------------------*/
2679     for (i=0; i<nsends; i++){
2680       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2681       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2682     }
2683     /* wait on receives of mat->i,j */
2684     /*------------------------------*/
2685     count = nrecvs;
2686     while (count) {
2687       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2688       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2689       count--;
2690     }
2691     /* wait on sends of mat->i,j */
2692     /*---------------------------*/
2693     if (nsends) {
2694       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2695     }
2696   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2697 
2698   /* post receives, send and receive mat->a */
2699   /*----------------------------------------*/
2700   for (imdex=0; imdex<nrecvs; imdex++) {
2701     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2702   }
2703   for (i=0; i<nsends; i++){
2704     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2705   }
2706   count = nrecvs;
2707   while (count) {
2708     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2709     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2710     count--;
2711   }
2712   if (nsends) {
2713     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2714   }
2715 
2716   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2717 
2718   /* create redundant matrix */
2719   /*-------------------------*/
2720   if (reuse == MAT_INITIAL_MATRIX){
2721     /* compute rownz_max for preallocation */
2722     for (imdex=0; imdex<nrecvs; imdex++){
2723       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2724       rptr = rbuf_j[imdex];
2725       for (i=0; i<j; i++){
2726         ncols = rptr[i+1] - rptr[i];
2727         if (rownz_max < ncols) rownz_max = ncols;
2728       }
2729     }
2730 
2731     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2732     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2733     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2734     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2735     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2736     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2737   } else {
2738     C = *matredundant;
2739   }
2740 
2741   /* insert local matrix entries */
2742   rptr = sbuf_j;
2743   cols = sbuf_j + rend-rstart + 1;
2744   vals = sbuf_a;
2745   for (i=0; i<rend-rstart; i++){
2746     row   = i + rstart;
2747     ncols = rptr[i+1] - rptr[i];
2748     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2749     vals += ncols;
2750     cols += ncols;
2751   }
2752   /* insert received matrix entries */
2753   for (imdex=0; imdex<nrecvs; imdex++){
2754     rstart = rowrange[recv_rank[imdex]];
2755     rend   = rowrange[recv_rank[imdex]+1];
2756     rptr = rbuf_j[imdex];
2757     cols = rbuf_j[imdex] + rend-rstart + 1;
2758     vals = rbuf_a[imdex];
2759     for (i=0; i<rend-rstart; i++){
2760       row   = i + rstart;
2761       ncols = rptr[i+1] - rptr[i];
2762       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2763       vals += ncols;
2764       cols += ncols;
2765     }
2766   }
2767   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2768   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2769   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2770   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2771   if (reuse == MAT_INITIAL_MATRIX) {
2772     PetscContainer container;
2773     *matredundant = C;
2774     /* create a supporting struct and attach it to C for reuse */
2775     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2776     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2777     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2778     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2779     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2780     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2781 
2782     redund->nzlocal = nzlocal;
2783     redund->nsends  = nsends;
2784     redund->nrecvs  = nrecvs;
2785     redund->send_rank = send_rank;
2786     redund->recv_rank = recv_rank;
2787     redund->sbuf_nz = sbuf_nz;
2788     redund->rbuf_nz = rbuf_nz;
2789     redund->sbuf_j  = sbuf_j;
2790     redund->sbuf_a  = sbuf_a;
2791     redund->rbuf_j  = rbuf_j;
2792     redund->rbuf_a  = rbuf_a;
2793 
2794     redund->Destroy = C->ops->destroy;
2795     C->ops->destroy = MatDestroy_MatRedundant;
2796   }
2797   PetscFunctionReturn(0);
2798 }
2799 
2800 #undef __FUNCT__
2801 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2802 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2803 {
2804   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2805   PetscErrorCode ierr;
2806   PetscInt       i,*idxb = 0;
2807   PetscScalar    *va,*vb;
2808   Vec            vtmp;
2809 
2810   PetscFunctionBegin;
2811   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2812   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2813   if (idx) {
2814     for (i=0; i<A->rmap->n; i++) {
2815       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2816     }
2817   }
2818 
2819   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2820   if (idx) {
2821     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2822   }
2823   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2824   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2825 
2826   for (i=0; i<A->rmap->n; i++){
2827     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2828       va[i] = vb[i];
2829       if (idx) idx[i] = a->garray[idxb[i]];
2830     }
2831   }
2832 
2833   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2834   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2835   ierr = PetscFree(idxb);CHKERRQ(ierr);
2836   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 #undef __FUNCT__
2841 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2842 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2843 {
2844   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2845   PetscErrorCode ierr;
2846   PetscInt       i,*idxb = 0;
2847   PetscScalar    *va,*vb;
2848   Vec            vtmp;
2849 
2850   PetscFunctionBegin;
2851   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2852   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2853   if (idx) {
2854     for (i=0; i<A->cmap->n; i++) {
2855       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2856     }
2857   }
2858 
2859   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2860   if (idx) {
2861     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2862   }
2863   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2864   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2865 
2866   for (i=0; i<A->rmap->n; i++){
2867     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2868       va[i] = vb[i];
2869       if (idx) idx[i] = a->garray[idxb[i]];
2870     }
2871   }
2872 
2873   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2874   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2875   ierr = PetscFree(idxb);CHKERRQ(ierr);
2876   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 #undef __FUNCT__
2881 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2882 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2883 {
2884   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2885   PetscInt       n      = A->rmap->n;
2886   PetscInt       cstart = A->cmap->rstart;
2887   PetscInt      *cmap   = mat->garray;
2888   PetscInt      *diagIdx, *offdiagIdx;
2889   Vec            diagV, offdiagV;
2890   PetscScalar   *a, *diagA, *offdiagA;
2891   PetscInt       r;
2892   PetscErrorCode ierr;
2893 
2894   PetscFunctionBegin;
2895   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2896   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2897   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2898   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2899   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2900   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2901   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2902   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2903   for(r = 0; r < n; ++r) {
2904     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2905       a[r]   = diagA[r];
2906       idx[r] = cstart + diagIdx[r];
2907     } else {
2908       a[r]   = offdiagA[r];
2909       idx[r] = cmap[offdiagIdx[r]];
2910     }
2911   }
2912   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2913   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2914   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2915   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2916   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2917   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2918   PetscFunctionReturn(0);
2919 }
2920 
2921 #undef __FUNCT__
2922 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2923 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2924 {
2925   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2926   PetscInt       n      = A->rmap->n;
2927   PetscInt       cstart = A->cmap->rstart;
2928   PetscInt      *cmap   = mat->garray;
2929   PetscInt      *diagIdx, *offdiagIdx;
2930   Vec            diagV, offdiagV;
2931   PetscScalar   *a, *diagA, *offdiagA;
2932   PetscInt       r;
2933   PetscErrorCode ierr;
2934 
2935   PetscFunctionBegin;
2936   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2937   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2938   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2939   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2940   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2941   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2942   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2943   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2944   for(r = 0; r < n; ++r) {
2945     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2946       a[r]   = diagA[r];
2947       idx[r] = cstart + diagIdx[r];
2948     } else {
2949       a[r]   = offdiagA[r];
2950       idx[r] = cmap[offdiagIdx[r]];
2951     }
2952   }
2953   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2954   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2955   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2956   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2957   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2958   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 #undef __FUNCT__
2963 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2964 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2965 {
2966   PetscErrorCode ierr;
2967   Mat            *dummy;
2968 
2969   PetscFunctionBegin;
2970   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2971   *newmat = *dummy;
2972   ierr = PetscFree(dummy);CHKERRQ(ierr);
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2977 
2978 #undef __FUNCT__
2979 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2980 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2981 {
2982   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2983   PetscErrorCode ierr;
2984 
2985   PetscFunctionBegin;
2986   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 
2991 /* -------------------------------------------------------------------*/
2992 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2993        MatGetRow_MPIAIJ,
2994        MatRestoreRow_MPIAIJ,
2995        MatMult_MPIAIJ,
2996 /* 4*/ MatMultAdd_MPIAIJ,
2997        MatMultTranspose_MPIAIJ,
2998        MatMultTransposeAdd_MPIAIJ,
2999 #ifdef PETSC_HAVE_PBGL
3000        MatSolve_MPIAIJ,
3001 #else
3002        0,
3003 #endif
3004        0,
3005        0,
3006 /*10*/ 0,
3007        0,
3008        0,
3009        MatSOR_MPIAIJ,
3010        MatTranspose_MPIAIJ,
3011 /*15*/ MatGetInfo_MPIAIJ,
3012        MatEqual_MPIAIJ,
3013        MatGetDiagonal_MPIAIJ,
3014        MatDiagonalScale_MPIAIJ,
3015        MatNorm_MPIAIJ,
3016 /*20*/ MatAssemblyBegin_MPIAIJ,
3017        MatAssemblyEnd_MPIAIJ,
3018        MatSetOption_MPIAIJ,
3019        MatZeroEntries_MPIAIJ,
3020 /*24*/ MatZeroRows_MPIAIJ,
3021        0,
3022 #ifdef PETSC_HAVE_PBGL
3023        0,
3024 #else
3025        0,
3026 #endif
3027        0,
3028        0,
3029 /*29*/ MatSetUp_MPIAIJ,
3030 #ifdef PETSC_HAVE_PBGL
3031        0,
3032 #else
3033        0,
3034 #endif
3035        0,
3036        0,
3037        0,
3038 /*34*/ MatDuplicate_MPIAIJ,
3039        0,
3040        0,
3041        0,
3042        0,
3043 /*39*/ MatAXPY_MPIAIJ,
3044        MatGetSubMatrices_MPIAIJ,
3045        MatIncreaseOverlap_MPIAIJ,
3046        MatGetValues_MPIAIJ,
3047        MatCopy_MPIAIJ,
3048 /*44*/ MatGetRowMax_MPIAIJ,
3049        MatScale_MPIAIJ,
3050        0,
3051        0,
3052        MatZeroRowsColumns_MPIAIJ,
3053 /*49*/ 0,
3054        0,
3055        0,
3056        0,
3057        0,
3058 /*54*/ MatFDColoringCreate_MPIAIJ,
3059        0,
3060        MatSetUnfactored_MPIAIJ,
3061        0, /* MatPermute_MPIAIJ, impl currently broken */
3062        0,
3063 /*59*/ MatGetSubMatrix_MPIAIJ,
3064        MatDestroy_MPIAIJ,
3065        MatView_MPIAIJ,
3066        0,
3067        0,
3068 /*64*/ 0,
3069        0,
3070        0,
3071        0,
3072        0,
3073 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3074        MatGetRowMinAbs_MPIAIJ,
3075        0,
3076        MatSetColoring_MPIAIJ,
3077 #if defined(PETSC_HAVE_ADIC)
3078        MatSetValuesAdic_MPIAIJ,
3079 #else
3080        0,
3081 #endif
3082        MatSetValuesAdifor_MPIAIJ,
3083 /*75*/ MatFDColoringApply_AIJ,
3084        0,
3085        0,
3086        0,
3087        0,
3088 /*80*/ 0,
3089        0,
3090        0,
3091 /*83*/ MatLoad_MPIAIJ,
3092        0,
3093        0,
3094        0,
3095        0,
3096        0,
3097 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3098        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3099        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3100        MatPtAP_Basic,
3101        MatPtAPSymbolic_MPIAIJ,
3102 /*94*/ MatPtAPNumeric_MPIAIJ,
3103        0,
3104        0,
3105        0,
3106        0,
3107 /*99*/ 0,
3108        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3109        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3110        MatConjugate_MPIAIJ,
3111        0,
3112 /*104*/MatSetValuesRow_MPIAIJ,
3113        MatRealPart_MPIAIJ,
3114        MatImaginaryPart_MPIAIJ,
3115        0,
3116        0,
3117 /*109*/0,
3118        MatGetRedundantMatrix_MPIAIJ,
3119        MatGetRowMin_MPIAIJ,
3120        0,
3121        0,
3122 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3123        0,
3124        0,
3125        0,
3126        0,
3127 /*119*/0,
3128        0,
3129        0,
3130        0,
3131        MatGetMultiProcBlock_MPIAIJ,
3132 /*124*/MatFindNonZeroRows_MPIAIJ,
3133        MatGetColumnNorms_MPIAIJ,
3134        MatInvertBlockDiagonal_MPIAIJ,
3135        0,
3136        MatGetSubMatricesParallel_MPIAIJ,
3137 /*129*/0,
3138        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3139        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3140        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3141        0,
3142 /*134*/0,
3143        0,
3144        0,
3145        0,
3146        0
3147 };
3148 
3149 /* ----------------------------------------------------------------------------------------*/
3150 
3151 EXTERN_C_BEGIN
3152 #undef __FUNCT__
3153 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3154 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3155 {
3156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3157   PetscErrorCode ierr;
3158 
3159   PetscFunctionBegin;
3160   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3161   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3162   PetscFunctionReturn(0);
3163 }
3164 EXTERN_C_END
3165 
3166 EXTERN_C_BEGIN
3167 #undef __FUNCT__
3168 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3169 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3170 {
3171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3172   PetscErrorCode ierr;
3173 
3174   PetscFunctionBegin;
3175   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3176   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3177   PetscFunctionReturn(0);
3178 }
3179 EXTERN_C_END
3180 
3181 EXTERN_C_BEGIN
3182 #undef __FUNCT__
3183 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3184 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3185 {
3186   Mat_MPIAIJ     *b;
3187   PetscErrorCode ierr;
3188   PetscInt       i;
3189   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3190 
3191   PetscFunctionBegin;
3192   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3193   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3194   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3195   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3196   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3197   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3198 
3199   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3200   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3201   if (d_nnz) {
3202     for (i=0; i<B->rmap->n; i++) {
3203       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3204     }
3205   }
3206   if (o_nnz) {
3207     for (i=0; i<B->rmap->n; i++) {
3208       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3209     }
3210   }
3211   b = (Mat_MPIAIJ*)B->data;
3212 
3213   if (!B->preallocated) {
3214     /* Explicitly create 2 MATSEQAIJ matrices. */
3215     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3216     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3217     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3218     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3219     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3220     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3221     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3222     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3223     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3224     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3225   }
3226 
3227   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3228   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3229   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3230   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3231   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3232   B->preallocated = PETSC_TRUE;
3233   PetscFunctionReturn(0);
3234 }
3235 EXTERN_C_END
3236 
3237 #undef __FUNCT__
3238 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3239 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3240 {
3241   Mat            mat;
3242   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3243   PetscErrorCode ierr;
3244 
3245   PetscFunctionBegin;
3246   *newmat       = 0;
3247   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3248   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3249   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3250   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3251   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3252   a    = (Mat_MPIAIJ*)mat->data;
3253 
3254   mat->factortype    = matin->factortype;
3255   mat->rmap->bs      = matin->rmap->bs;
3256   mat->cmap->bs      = matin->cmap->bs;
3257   mat->assembled    = PETSC_TRUE;
3258   mat->insertmode   = NOT_SET_VALUES;
3259   mat->preallocated = PETSC_TRUE;
3260 
3261   a->size           = oldmat->size;
3262   a->rank           = oldmat->rank;
3263   a->donotstash     = oldmat->donotstash;
3264   a->roworiented    = oldmat->roworiented;
3265   a->rowindices     = 0;
3266   a->rowvalues      = 0;
3267   a->getrowactive   = PETSC_FALSE;
3268 
3269   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3270   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3271 
3272   if (oldmat->colmap) {
3273 #if defined (PETSC_USE_CTABLE)
3274     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3275 #else
3276     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3277     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3278     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3279 #endif
3280   } else a->colmap = 0;
3281   if (oldmat->garray) {
3282     PetscInt len;
3283     len  = oldmat->B->cmap->n;
3284     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3285     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3286     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3287   } else a->garray = 0;
3288 
3289   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3290   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3291   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3292   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3293   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3294   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3295   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3296   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3297   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3298   *newmat = mat;
3299   PetscFunctionReturn(0);
3300 }
3301 
3302 
3303 
3304 #undef __FUNCT__
3305 #define __FUNCT__ "MatLoad_MPIAIJ"
3306 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3307 {
3308   PetscScalar    *vals,*svals;
3309   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3310   PetscErrorCode ierr;
3311   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3312   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3313   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3314   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3315   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3316   int            fd;
3317 
3318   PetscFunctionBegin;
3319   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3320   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3321   if (!rank) {
3322     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3323     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3324     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3325   }
3326 
3327   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3328 
3329   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3330   M = header[1]; N = header[2];
3331   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3332   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3333   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3334 
3335   /* If global sizes are set, check if they are consistent with that given in the file */
3336   if (sizesset) {
3337     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3338   }
3339   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3340   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3341 
3342   /* determine ownership of all rows */
3343   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3344   else m = newMat->rmap->n; /* Set by user */
3345 
3346   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3347   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3348 
3349   /* First process needs enough room for process with most rows */
3350   if (!rank) {
3351     mmax       = rowners[1];
3352     for (i=2; i<size; i++) {
3353       mmax = PetscMax(mmax,rowners[i]);
3354     }
3355   } else mmax = m;
3356 
3357   rowners[0] = 0;
3358   for (i=2; i<=size; i++) {
3359     rowners[i] += rowners[i-1];
3360   }
3361   rstart = rowners[rank];
3362   rend   = rowners[rank+1];
3363 
3364   /* distribute row lengths to all processors */
3365   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3366   if (!rank) {
3367     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3368     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3369     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3370     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3371     for (j=0; j<m; j++) {
3372       procsnz[0] += ourlens[j];
3373     }
3374     for (i=1; i<size; i++) {
3375       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3376       /* calculate the number of nonzeros on each processor */
3377       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3378         procsnz[i] += rowlengths[j];
3379       }
3380       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3381     }
3382     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3383   } else {
3384     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3385   }
3386 
3387   if (!rank) {
3388     /* determine max buffer needed and allocate it */
3389     maxnz = 0;
3390     for (i=0; i<size; i++) {
3391       maxnz = PetscMax(maxnz,procsnz[i]);
3392     }
3393     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3394 
3395     /* read in my part of the matrix column indices  */
3396     nz   = procsnz[0];
3397     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3398     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3399 
3400     /* read in every one elses and ship off */
3401     for (i=1; i<size; i++) {
3402       nz     = procsnz[i];
3403       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3404       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3405     }
3406     ierr = PetscFree(cols);CHKERRQ(ierr);
3407   } else {
3408     /* determine buffer space needed for message */
3409     nz = 0;
3410     for (i=0; i<m; i++) {
3411       nz += ourlens[i];
3412     }
3413     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3414 
3415     /* receive message of column indices*/
3416     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3417   }
3418 
3419   /* determine column ownership if matrix is not square */
3420   if (N != M) {
3421     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3422     else n = newMat->cmap->n;
3423     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3424     cstart = cend - n;
3425   } else {
3426     cstart = rstart;
3427     cend   = rend;
3428     n      = cend - cstart;
3429   }
3430 
3431   /* loop over local rows, determining number of off diagonal entries */
3432   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3433   jj = 0;
3434   for (i=0; i<m; i++) {
3435     for (j=0; j<ourlens[i]; j++) {
3436       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3437       jj++;
3438     }
3439   }
3440 
3441   for (i=0; i<m; i++) {
3442     ourlens[i] -= offlens[i];
3443   }
3444   if (!sizesset) {
3445     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3446   }
3447   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3448 
3449   for (i=0; i<m; i++) {
3450     ourlens[i] += offlens[i];
3451   }
3452 
3453   if (!rank) {
3454     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3455 
3456     /* read in my part of the matrix numerical values  */
3457     nz   = procsnz[0];
3458     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3459 
3460     /* insert into matrix */
3461     jj      = rstart;
3462     smycols = mycols;
3463     svals   = vals;
3464     for (i=0; i<m; i++) {
3465       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3466       smycols += ourlens[i];
3467       svals   += ourlens[i];
3468       jj++;
3469     }
3470 
3471     /* read in other processors and ship out */
3472     for (i=1; i<size; i++) {
3473       nz     = procsnz[i];
3474       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3475       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3476     }
3477     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3478   } else {
3479     /* receive numeric values */
3480     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3481 
3482     /* receive message of values*/
3483     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3484 
3485     /* insert into matrix */
3486     jj      = rstart;
3487     smycols = mycols;
3488     svals   = vals;
3489     for (i=0; i<m; i++) {
3490       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3491       smycols += ourlens[i];
3492       svals   += ourlens[i];
3493       jj++;
3494     }
3495   }
3496   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3497   ierr = PetscFree(vals);CHKERRQ(ierr);
3498   ierr = PetscFree(mycols);CHKERRQ(ierr);
3499   ierr = PetscFree(rowners);CHKERRQ(ierr);
3500 
3501   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3502   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3503   PetscFunctionReturn(0);
3504 }
3505 
3506 #undef __FUNCT__
3507 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3508 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3509 {
3510   PetscErrorCode ierr;
3511   IS             iscol_local;
3512   PetscInt       csize;
3513 
3514   PetscFunctionBegin;
3515   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3516   if (call == MAT_REUSE_MATRIX) {
3517     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3518     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3519   } else {
3520     PetscInt cbs;
3521     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3522     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3523     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3524   }
3525   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3526   if (call == MAT_INITIAL_MATRIX) {
3527     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3528     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3529   }
3530   PetscFunctionReturn(0);
3531 }
3532 
3533 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3534 #undef __FUNCT__
3535 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3536 /*
3537     Not great since it makes two copies of the submatrix, first an SeqAIJ
3538   in local and then by concatenating the local matrices the end result.
3539   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3540 
3541   Note: This requires a sequential iscol with all indices.
3542 */
3543 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3544 {
3545   PetscErrorCode ierr;
3546   PetscMPIInt    rank,size;
3547   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3548   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3549   PetscBool      allcolumns, colflag;
3550   Mat            M,Mreuse;
3551   MatScalar      *vwork,*aa;
3552   MPI_Comm       comm = ((PetscObject)mat)->comm;
3553   Mat_SeqAIJ     *aij;
3554 
3555 
3556   PetscFunctionBegin;
3557   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3558   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3559 
3560   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3561   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3562   if (colflag && ncol == mat->cmap->N){
3563     allcolumns = PETSC_TRUE;
3564   } else {
3565     allcolumns = PETSC_FALSE;
3566   }
3567   if (call ==  MAT_REUSE_MATRIX) {
3568     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3569     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3570     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3571   } else {
3572     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3573   }
3574 
3575   /*
3576       m - number of local rows
3577       n - number of columns (same on all processors)
3578       rstart - first row in new global matrix generated
3579   */
3580   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3581   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3582   if (call == MAT_INITIAL_MATRIX) {
3583     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3584     ii  = aij->i;
3585     jj  = aij->j;
3586 
3587     /*
3588         Determine the number of non-zeros in the diagonal and off-diagonal
3589         portions of the matrix in order to do correct preallocation
3590     */
3591 
3592     /* first get start and end of "diagonal" columns */
3593     if (csize == PETSC_DECIDE) {
3594       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3595       if (mglobal == n) { /* square matrix */
3596 	nlocal = m;
3597       } else {
3598         nlocal = n/size + ((n % size) > rank);
3599       }
3600     } else {
3601       nlocal = csize;
3602     }
3603     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3604     rstart = rend - nlocal;
3605     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3606 
3607     /* next, compute all the lengths */
3608     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3609     olens = dlens + m;
3610     for (i=0; i<m; i++) {
3611       jend = ii[i+1] - ii[i];
3612       olen = 0;
3613       dlen = 0;
3614       for (j=0; j<jend; j++) {
3615         if (*jj < rstart || *jj >= rend) olen++;
3616         else dlen++;
3617         jj++;
3618       }
3619       olens[i] = olen;
3620       dlens[i] = dlen;
3621     }
3622     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3623     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3624     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3625     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3626     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3627     ierr = PetscFree(dlens);CHKERRQ(ierr);
3628   } else {
3629     PetscInt ml,nl;
3630 
3631     M = *newmat;
3632     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3633     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3634     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3635     /*
3636          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3637        rather than the slower MatSetValues().
3638     */
3639     M->was_assembled = PETSC_TRUE;
3640     M->assembled     = PETSC_FALSE;
3641   }
3642   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3643   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3644   ii  = aij->i;
3645   jj  = aij->j;
3646   aa  = aij->a;
3647   for (i=0; i<m; i++) {
3648     row   = rstart + i;
3649     nz    = ii[i+1] - ii[i];
3650     cwork = jj;     jj += nz;
3651     vwork = aa;     aa += nz;
3652     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3653   }
3654 
3655   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3656   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3657   *newmat = M;
3658 
3659   /* save submatrix used in processor for next request */
3660   if (call ==  MAT_INITIAL_MATRIX) {
3661     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3662     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3663   }
3664 
3665   PetscFunctionReturn(0);
3666 }
3667 
3668 EXTERN_C_BEGIN
3669 #undef __FUNCT__
3670 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3671 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3672 {
3673   PetscInt       m,cstart, cend,j,nnz,i,d;
3674   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3675   const PetscInt *JJ;
3676   PetscScalar    *values;
3677   PetscErrorCode ierr;
3678 
3679   PetscFunctionBegin;
3680   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3681 
3682   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3683   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3684   m      = B->rmap->n;
3685   cstart = B->cmap->rstart;
3686   cend   = B->cmap->rend;
3687   rstart = B->rmap->rstart;
3688 
3689   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3690 
3691 #if defined(PETSC_USE_DEBUGGING)
3692   for (i=0; i<m; i++) {
3693     nnz     = Ii[i+1]- Ii[i];
3694     JJ      = J + Ii[i];
3695     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3696     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3697     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3698   }
3699 #endif
3700 
3701   for (i=0; i<m; i++) {
3702     nnz     = Ii[i+1]- Ii[i];
3703     JJ      = J + Ii[i];
3704     nnz_max = PetscMax(nnz_max,nnz);
3705     d       = 0;
3706     for (j=0; j<nnz; j++) {
3707       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3708     }
3709     d_nnz[i] = d;
3710     o_nnz[i] = nnz - d;
3711   }
3712   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3713   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3714 
3715   if (v) values = (PetscScalar*)v;
3716   else {
3717     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3718     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3719   }
3720 
3721   for (i=0; i<m; i++) {
3722     ii   = i + rstart;
3723     nnz  = Ii[i+1]- Ii[i];
3724     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3725   }
3726   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3727   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3728 
3729   if (!v) {
3730     ierr = PetscFree(values);CHKERRQ(ierr);
3731   }
3732   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3733   PetscFunctionReturn(0);
3734 }
3735 EXTERN_C_END
3736 
3737 #undef __FUNCT__
3738 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3739 /*@
3740    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3741    (the default parallel PETSc format).
3742 
3743    Collective on MPI_Comm
3744 
3745    Input Parameters:
3746 +  B - the matrix
3747 .  i - the indices into j for the start of each local row (starts with zero)
3748 .  j - the column indices for each local row (starts with zero)
3749 -  v - optional values in the matrix
3750 
3751    Level: developer
3752 
3753    Notes:
3754        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3755      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3756      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3757 
3758        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3759 
3760        The format which is used for the sparse matrix input, is equivalent to a
3761     row-major ordering.. i.e for the following matrix, the input data expected is
3762     as shown:
3763 
3764         1 0 0
3765         2 0 3     P0
3766        -------
3767         4 5 6     P1
3768 
3769      Process0 [P0]: rows_owned=[0,1]
3770         i =  {0,1,3}  [size = nrow+1  = 2+1]
3771         j =  {0,0,2}  [size = nz = 6]
3772         v =  {1,2,3}  [size = nz = 6]
3773 
3774      Process1 [P1]: rows_owned=[2]
3775         i =  {0,3}    [size = nrow+1  = 1+1]
3776         j =  {0,1,2}  [size = nz = 6]
3777         v =  {4,5,6}  [size = nz = 6]
3778 
3779 .keywords: matrix, aij, compressed row, sparse, parallel
3780 
3781 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3782           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3783 @*/
3784 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3785 {
3786   PetscErrorCode ierr;
3787 
3788   PetscFunctionBegin;
3789   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3790   PetscFunctionReturn(0);
3791 }
3792 
3793 #undef __FUNCT__
3794 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3795 /*@C
3796    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3797    (the default parallel PETSc format).  For good matrix assembly performance
3798    the user should preallocate the matrix storage by setting the parameters
3799    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3800    performance can be increased by more than a factor of 50.
3801 
3802    Collective on MPI_Comm
3803 
3804    Input Parameters:
3805 +  A - the matrix
3806 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3807            (same value is used for all local rows)
3808 .  d_nnz - array containing the number of nonzeros in the various rows of the
3809            DIAGONAL portion of the local submatrix (possibly different for each row)
3810            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3811            The size of this array is equal to the number of local rows, i.e 'm'.
3812            For matrices that will be factored, you must leave room for (and set)
3813            the diagonal entry even if it is zero.
3814 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3815            submatrix (same value is used for all local rows).
3816 -  o_nnz - array containing the number of nonzeros in the various rows of the
3817            OFF-DIAGONAL portion of the local submatrix (possibly different for
3818            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3819            structure. The size of this array is equal to the number
3820            of local rows, i.e 'm'.
3821 
3822    If the *_nnz parameter is given then the *_nz parameter is ignored
3823 
3824    The AIJ format (also called the Yale sparse matrix format or
3825    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3826    storage.  The stored row and column indices begin with zero.
3827    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3828 
3829    The parallel matrix is partitioned such that the first m0 rows belong to
3830    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3831    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3832 
3833    The DIAGONAL portion of the local submatrix of a processor can be defined
3834    as the submatrix which is obtained by extraction the part corresponding to
3835    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3836    first row that belongs to the processor, r2 is the last row belonging to
3837    the this processor, and c1-c2 is range of indices of the local part of a
3838    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3839    common case of a square matrix, the row and column ranges are the same and
3840    the DIAGONAL part is also square. The remaining portion of the local
3841    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3842 
3843    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3844 
3845    You can call MatGetInfo() to get information on how effective the preallocation was;
3846    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3847    You can also run with the option -info and look for messages with the string
3848    malloc in them to see if additional memory allocation was needed.
3849 
3850    Example usage:
3851 
3852    Consider the following 8x8 matrix with 34 non-zero values, that is
3853    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3854    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3855    as follows:
3856 
3857 .vb
3858             1  2  0  |  0  3  0  |  0  4
3859     Proc0   0  5  6  |  7  0  0  |  8  0
3860             9  0 10  | 11  0  0  | 12  0
3861     -------------------------------------
3862            13  0 14  | 15 16 17  |  0  0
3863     Proc1   0 18  0  | 19 20 21  |  0  0
3864             0  0  0  | 22 23  0  | 24  0
3865     -------------------------------------
3866     Proc2  25 26 27  |  0  0 28  | 29  0
3867            30  0  0  | 31 32 33  |  0 34
3868 .ve
3869 
3870    This can be represented as a collection of submatrices as:
3871 
3872 .vb
3873       A B C
3874       D E F
3875       G H I
3876 .ve
3877 
3878    Where the submatrices A,B,C are owned by proc0, D,E,F are
3879    owned by proc1, G,H,I are owned by proc2.
3880 
3881    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3882    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3883    The 'M','N' parameters are 8,8, and have the same values on all procs.
3884 
3885    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3886    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3887    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3888    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3889    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3890    matrix, ans [DF] as another SeqAIJ matrix.
3891 
3892    When d_nz, o_nz parameters are specified, d_nz storage elements are
3893    allocated for every row of the local diagonal submatrix, and o_nz
3894    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3895    One way to choose d_nz and o_nz is to use the max nonzerors per local
3896    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3897    In this case, the values of d_nz,o_nz are:
3898 .vb
3899      proc0 : dnz = 2, o_nz = 2
3900      proc1 : dnz = 3, o_nz = 2
3901      proc2 : dnz = 1, o_nz = 4
3902 .ve
3903    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3904    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3905    for proc3. i.e we are using 12+15+10=37 storage locations to store
3906    34 values.
3907 
3908    When d_nnz, o_nnz parameters are specified, the storage is specified
3909    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3910    In the above case the values for d_nnz,o_nnz are:
3911 .vb
3912      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3913      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3914      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3915 .ve
3916    Here the space allocated is sum of all the above values i.e 34, and
3917    hence pre-allocation is perfect.
3918 
3919    Level: intermediate
3920 
3921 .keywords: matrix, aij, compressed row, sparse, parallel
3922 
3923 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3924           MPIAIJ, MatGetInfo()
3925 @*/
3926 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3927 {
3928   PetscErrorCode ierr;
3929 
3930   PetscFunctionBegin;
3931   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3932   PetscValidType(B,1);
3933   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3934   PetscFunctionReturn(0);
3935 }
3936 
3937 #undef __FUNCT__
3938 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3939 /*@
3940      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3941          CSR format the local rows.
3942 
3943    Collective on MPI_Comm
3944 
3945    Input Parameters:
3946 +  comm - MPI communicator
3947 .  m - number of local rows (Cannot be PETSC_DECIDE)
3948 .  n - This value should be the same as the local size used in creating the
3949        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3950        calculated if N is given) For square matrices n is almost always m.
3951 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3952 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3953 .   i - row indices
3954 .   j - column indices
3955 -   a - matrix values
3956 
3957    Output Parameter:
3958 .   mat - the matrix
3959 
3960    Level: intermediate
3961 
3962    Notes:
3963        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3964      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3965      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3966 
3967        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3968 
3969        The format which is used for the sparse matrix input, is equivalent to a
3970     row-major ordering.. i.e for the following matrix, the input data expected is
3971     as shown:
3972 
3973         1 0 0
3974         2 0 3     P0
3975        -------
3976         4 5 6     P1
3977 
3978      Process0 [P0]: rows_owned=[0,1]
3979         i =  {0,1,3}  [size = nrow+1  = 2+1]
3980         j =  {0,0,2}  [size = nz = 6]
3981         v =  {1,2,3}  [size = nz = 6]
3982 
3983      Process1 [P1]: rows_owned=[2]
3984         i =  {0,3}    [size = nrow+1  = 1+1]
3985         j =  {0,1,2}  [size = nz = 6]
3986         v =  {4,5,6}  [size = nz = 6]
3987 
3988 .keywords: matrix, aij, compressed row, sparse, parallel
3989 
3990 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3991           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3992 @*/
3993 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3994 {
3995   PetscErrorCode ierr;
3996 
3997   PetscFunctionBegin;
3998   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3999   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4000   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4001   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4002   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4003   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4004   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4005   PetscFunctionReturn(0);
4006 }
4007 
4008 #undef __FUNCT__
4009 #define __FUNCT__ "MatCreateAIJ"
4010 /*@C
4011    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4012    (the default parallel PETSc format).  For good matrix assembly performance
4013    the user should preallocate the matrix storage by setting the parameters
4014    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4015    performance can be increased by more than a factor of 50.
4016 
4017    Collective on MPI_Comm
4018 
4019    Input Parameters:
4020 +  comm - MPI communicator
4021 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4022            This value should be the same as the local size used in creating the
4023            y vector for the matrix-vector product y = Ax.
4024 .  n - This value should be the same as the local size used in creating the
4025        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4026        calculated if N is given) For square matrices n is almost always m.
4027 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4028 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4029 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4030            (same value is used for all local rows)
4031 .  d_nnz - array containing the number of nonzeros in the various rows of the
4032            DIAGONAL portion of the local submatrix (possibly different for each row)
4033            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4034            The size of this array is equal to the number of local rows, i.e 'm'.
4035 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4036            submatrix (same value is used for all local rows).
4037 -  o_nnz - array containing the number of nonzeros in the various rows of the
4038            OFF-DIAGONAL portion of the local submatrix (possibly different for
4039            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4040            structure. The size of this array is equal to the number
4041            of local rows, i.e 'm'.
4042 
4043    Output Parameter:
4044 .  A - the matrix
4045 
4046    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4047    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4048    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4049 
4050    Notes:
4051    If the *_nnz parameter is given then the *_nz parameter is ignored
4052 
4053    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4054    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4055    storage requirements for this matrix.
4056 
4057    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4058    processor than it must be used on all processors that share the object for
4059    that argument.
4060 
4061    The user MUST specify either the local or global matrix dimensions
4062    (possibly both).
4063 
4064    The parallel matrix is partitioned across processors such that the
4065    first m0 rows belong to process 0, the next m1 rows belong to
4066    process 1, the next m2 rows belong to process 2 etc.. where
4067    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4068    values corresponding to [m x N] submatrix.
4069 
4070    The columns are logically partitioned with the n0 columns belonging
4071    to 0th partition, the next n1 columns belonging to the next
4072    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4073 
4074    The DIAGONAL portion of the local submatrix on any given processor
4075    is the submatrix corresponding to the rows and columns m,n
4076    corresponding to the given processor. i.e diagonal matrix on
4077    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4078    etc. The remaining portion of the local submatrix [m x (N-n)]
4079    constitute the OFF-DIAGONAL portion. The example below better
4080    illustrates this concept.
4081 
4082    For a square global matrix we define each processor's diagonal portion
4083    to be its local rows and the corresponding columns (a square submatrix);
4084    each processor's off-diagonal portion encompasses the remainder of the
4085    local matrix (a rectangular submatrix).
4086 
4087    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4088 
4089    When calling this routine with a single process communicator, a matrix of
4090    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4091    type of communicator, use the construction mechanism:
4092      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4093 
4094    By default, this format uses inodes (identical nodes) when possible.
4095    We search for consecutive rows with the same nonzero structure, thereby
4096    reusing matrix information to achieve increased efficiency.
4097 
4098    Options Database Keys:
4099 +  -mat_no_inode  - Do not use inodes
4100 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4101 -  -mat_aij_oneindex - Internally use indexing starting at 1
4102         rather than 0.  Note that when calling MatSetValues(),
4103         the user still MUST index entries starting at 0!
4104 
4105 
4106    Example usage:
4107 
4108    Consider the following 8x8 matrix with 34 non-zero values, that is
4109    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4110    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4111    as follows:
4112 
4113 .vb
4114             1  2  0  |  0  3  0  |  0  4
4115     Proc0   0  5  6  |  7  0  0  |  8  0
4116             9  0 10  | 11  0  0  | 12  0
4117     -------------------------------------
4118            13  0 14  | 15 16 17  |  0  0
4119     Proc1   0 18  0  | 19 20 21  |  0  0
4120             0  0  0  | 22 23  0  | 24  0
4121     -------------------------------------
4122     Proc2  25 26 27  |  0  0 28  | 29  0
4123            30  0  0  | 31 32 33  |  0 34
4124 .ve
4125 
4126    This can be represented as a collection of submatrices as:
4127 
4128 .vb
4129       A B C
4130       D E F
4131       G H I
4132 .ve
4133 
4134    Where the submatrices A,B,C are owned by proc0, D,E,F are
4135    owned by proc1, G,H,I are owned by proc2.
4136 
4137    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4138    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4139    The 'M','N' parameters are 8,8, and have the same values on all procs.
4140 
4141    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4142    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4143    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4144    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4145    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4146    matrix, ans [DF] as another SeqAIJ matrix.
4147 
4148    When d_nz, o_nz parameters are specified, d_nz storage elements are
4149    allocated for every row of the local diagonal submatrix, and o_nz
4150    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4151    One way to choose d_nz and o_nz is to use the max nonzerors per local
4152    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4153    In this case, the values of d_nz,o_nz are:
4154 .vb
4155      proc0 : dnz = 2, o_nz = 2
4156      proc1 : dnz = 3, o_nz = 2
4157      proc2 : dnz = 1, o_nz = 4
4158 .ve
4159    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4160    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4161    for proc3. i.e we are using 12+15+10=37 storage locations to store
4162    34 values.
4163 
4164    When d_nnz, o_nnz parameters are specified, the storage is specified
4165    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4166    In the above case the values for d_nnz,o_nnz are:
4167 .vb
4168      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4169      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4170      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4171 .ve
4172    Here the space allocated is sum of all the above values i.e 34, and
4173    hence pre-allocation is perfect.
4174 
4175    Level: intermediate
4176 
4177 .keywords: matrix, aij, compressed row, sparse, parallel
4178 
4179 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4180           MPIAIJ, MatCreateMPIAIJWithArrays()
4181 @*/
4182 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4183 {
4184   PetscErrorCode ierr;
4185   PetscMPIInt    size;
4186 
4187   PetscFunctionBegin;
4188   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4189   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4190   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4191   if (size > 1) {
4192     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4193     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4194   } else {
4195     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4196     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4197   }
4198   PetscFunctionReturn(0);
4199 }
4200 
4201 #undef __FUNCT__
4202 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4203 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4204 {
4205   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4206 
4207   PetscFunctionBegin;
4208   *Ad     = a->A;
4209   *Ao     = a->B;
4210   *colmap = a->garray;
4211   PetscFunctionReturn(0);
4212 }
4213 
4214 #undef __FUNCT__
4215 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4216 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4217 {
4218   PetscErrorCode ierr;
4219   PetscInt       i;
4220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4221 
4222   PetscFunctionBegin;
4223   if (coloring->ctype == IS_COLORING_GLOBAL) {
4224     ISColoringValue *allcolors,*colors;
4225     ISColoring      ocoloring;
4226 
4227     /* set coloring for diagonal portion */
4228     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4229 
4230     /* set coloring for off-diagonal portion */
4231     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4232     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4233     for (i=0; i<a->B->cmap->n; i++) {
4234       colors[i] = allcolors[a->garray[i]];
4235     }
4236     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4237     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4238     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4239     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4240   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4241     ISColoringValue *colors;
4242     PetscInt        *larray;
4243     ISColoring      ocoloring;
4244 
4245     /* set coloring for diagonal portion */
4246     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4247     for (i=0; i<a->A->cmap->n; i++) {
4248       larray[i] = i + A->cmap->rstart;
4249     }
4250     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4251     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4252     for (i=0; i<a->A->cmap->n; i++) {
4253       colors[i] = coloring->colors[larray[i]];
4254     }
4255     ierr = PetscFree(larray);CHKERRQ(ierr);
4256     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4257     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4258     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4259 
4260     /* set coloring for off-diagonal portion */
4261     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4262     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4263     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4264     for (i=0; i<a->B->cmap->n; i++) {
4265       colors[i] = coloring->colors[larray[i]];
4266     }
4267     ierr = PetscFree(larray);CHKERRQ(ierr);
4268     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4269     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4270     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4271   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4272 
4273   PetscFunctionReturn(0);
4274 }
4275 
4276 #if defined(PETSC_HAVE_ADIC)
4277 #undef __FUNCT__
4278 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4279 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4280 {
4281   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4282   PetscErrorCode ierr;
4283 
4284   PetscFunctionBegin;
4285   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4286   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4287   PetscFunctionReturn(0);
4288 }
4289 #endif
4290 
4291 #undef __FUNCT__
4292 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4293 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4294 {
4295   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4296   PetscErrorCode ierr;
4297 
4298   PetscFunctionBegin;
4299   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4300   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4301   PetscFunctionReturn(0);
4302 }
4303 
4304 #undef __FUNCT__
4305 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4306 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4307 {
4308   PetscErrorCode ierr;
4309   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4310   PetscInt       *indx;
4311 
4312   PetscFunctionBegin;
4313   /* This routine will ONLY return MPIAIJ type matrix */
4314   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4315   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4316   if (n == PETSC_DECIDE){
4317     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4318   }
4319   /* Check sum(n) = N */
4320   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4321   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4322 
4323   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4324   rstart -= m;
4325 
4326   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4327   for (i=0;i<m;i++) {
4328     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4329     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4330     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4331   }
4332 
4333   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4334   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4335   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4336   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4337   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4338   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4339   PetscFunctionReturn(0);
4340 }
4341 
4342 #undef __FUNCT__
4343 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4344 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4345 {
4346   PetscErrorCode ierr;
4347   PetscInt       m,N,i,rstart,nnz,Ii;
4348   PetscInt       *indx;
4349   PetscScalar    *values;
4350 
4351   PetscFunctionBegin;
4352   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4353   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4354   for (i=0;i<m;i++) {
4355     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4356     Ii    = i + rstart;
4357     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4358     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4359   }
4360   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4361   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4362   PetscFunctionReturn(0);
4363 }
4364 
4365 #undef __FUNCT__
4366 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4367 /*@
4368       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4369                  matrices from each processor
4370 
4371     Collective on MPI_Comm
4372 
4373    Input Parameters:
4374 +    comm - the communicators the parallel matrix will live on
4375 .    inmat - the input sequential matrices
4376 .    n - number of local columns (or PETSC_DECIDE)
4377 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4378 
4379    Output Parameter:
4380 .    outmat - the parallel matrix generated
4381 
4382     Level: advanced
4383 
4384    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4385 
4386 @*/
4387 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4388 {
4389   PetscErrorCode ierr;
4390 
4391   PetscFunctionBegin;
4392   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4393   if (scall == MAT_INITIAL_MATRIX){
4394     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4395   }
4396   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4397   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4398   PetscFunctionReturn(0);
4399 }
4400 
4401 #undef __FUNCT__
4402 #define __FUNCT__ "MatFileSplit"
4403 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4404 {
4405   PetscErrorCode    ierr;
4406   PetscMPIInt       rank;
4407   PetscInt          m,N,i,rstart,nnz;
4408   size_t            len;
4409   const PetscInt    *indx;
4410   PetscViewer       out;
4411   char              *name;
4412   Mat               B;
4413   const PetscScalar *values;
4414 
4415   PetscFunctionBegin;
4416   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4417   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4418   /* Should this be the type of the diagonal block of A? */
4419   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4420   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4421   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4422   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4423   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4424   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4425   for (i=0;i<m;i++) {
4426     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4427     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4428     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4429   }
4430   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4431   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4432 
4433   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4434   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4435   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4436   sprintf(name,"%s.%d",outfile,rank);
4437   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4438   ierr = PetscFree(name);
4439   ierr = MatView(B,out);CHKERRQ(ierr);
4440   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4441   ierr = MatDestroy(&B);CHKERRQ(ierr);
4442   PetscFunctionReturn(0);
4443 }
4444 
4445 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4446 #undef __FUNCT__
4447 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4448 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4449 {
4450   PetscErrorCode       ierr;
4451   Mat_Merge_SeqsToMPI  *merge;
4452   PetscContainer       container;
4453 
4454   PetscFunctionBegin;
4455   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4456   if (container) {
4457     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4458     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4459     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4460     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4461     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4462     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4463     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4464     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4465     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4466     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4467     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4468     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4469     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4470     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4471     ierr = PetscFree(merge);CHKERRQ(ierr);
4472     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4473   }
4474   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4475   PetscFunctionReturn(0);
4476 }
4477 
4478 #include <../src/mat/utils/freespace.h>
4479 #include <petscbt.h>
4480 
4481 #undef __FUNCT__
4482 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4483 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4484 {
4485   PetscErrorCode       ierr;
4486   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4487   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4488   PetscMPIInt          size,rank,taga,*len_s;
4489   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4490   PetscInt             proc,m;
4491   PetscInt             **buf_ri,**buf_rj;
4492   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4493   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4494   MPI_Request          *s_waits,*r_waits;
4495   MPI_Status           *status;
4496   MatScalar            *aa=a->a;
4497   MatScalar            **abuf_r,*ba_i;
4498   Mat_Merge_SeqsToMPI  *merge;
4499   PetscContainer       container;
4500 
4501   PetscFunctionBegin;
4502   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4503 
4504   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4505   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4506 
4507   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4508   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4509 
4510   bi     = merge->bi;
4511   bj     = merge->bj;
4512   buf_ri = merge->buf_ri;
4513   buf_rj = merge->buf_rj;
4514 
4515   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4516   owners = merge->rowmap->range;
4517   len_s  = merge->len_s;
4518 
4519   /* send and recv matrix values */
4520   /*-----------------------------*/
4521   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4522   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4523 
4524   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4525   for (proc=0,k=0; proc<size; proc++){
4526     if (!len_s[proc]) continue;
4527     i = owners[proc];
4528     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4529     k++;
4530   }
4531 
4532   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4533   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4534   ierr = PetscFree(status);CHKERRQ(ierr);
4535 
4536   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4537   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4538 
4539   /* insert mat values of mpimat */
4540   /*----------------------------*/
4541   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4542   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4543 
4544   for (k=0; k<merge->nrecv; k++){
4545     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4546     nrows = *(buf_ri_k[k]);
4547     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4548     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4549   }
4550 
4551   /* set values of ba */
4552   m = merge->rowmap->n;
4553   for (i=0; i<m; i++) {
4554     arow = owners[rank] + i;
4555     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4556     bnzi = bi[i+1] - bi[i];
4557     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4558 
4559     /* add local non-zero vals of this proc's seqmat into ba */
4560     anzi = ai[arow+1] - ai[arow];
4561     aj   = a->j + ai[arow];
4562     aa   = a->a + ai[arow];
4563     nextaj = 0;
4564     for (j=0; nextaj<anzi; j++){
4565       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4566         ba_i[j] += aa[nextaj++];
4567       }
4568     }
4569 
4570     /* add received vals into ba */
4571     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4572       /* i-th row */
4573       if (i == *nextrow[k]) {
4574         anzi = *(nextai[k]+1) - *nextai[k];
4575         aj   = buf_rj[k] + *(nextai[k]);
4576         aa   = abuf_r[k] + *(nextai[k]);
4577         nextaj = 0;
4578         for (j=0; nextaj<anzi; j++){
4579           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4580             ba_i[j] += aa[nextaj++];
4581           }
4582         }
4583         nextrow[k]++; nextai[k]++;
4584       }
4585     }
4586     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4587   }
4588   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4589   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4590 
4591   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4592   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4593   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4594   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4595   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4596   PetscFunctionReturn(0);
4597 }
4598 
4599 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4600 
4601 #undef __FUNCT__
4602 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4603 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4604 {
4605   PetscErrorCode       ierr;
4606   Mat                  B_mpi;
4607   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4608   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4609   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4610   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4611   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4612   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4613   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4614   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4615   MPI_Status           *status;
4616   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4617   PetscBT              lnkbt;
4618   Mat_Merge_SeqsToMPI  *merge;
4619   PetscContainer       container;
4620 
4621   PetscFunctionBegin;
4622   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4623 
4624   /* make sure it is a PETSc comm */
4625   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4627   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4628 
4629   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4630   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4631 
4632   /* determine row ownership */
4633   /*---------------------------------------------------------*/
4634   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4635   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4636   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4637   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4638   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4639   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4640   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4641 
4642   m      = merge->rowmap->n;
4643   M      = merge->rowmap->N;
4644   owners = merge->rowmap->range;
4645 
4646   /* determine the number of messages to send, their lengths */
4647   /*---------------------------------------------------------*/
4648   len_s  = merge->len_s;
4649 
4650   len = 0;  /* length of buf_si[] */
4651   merge->nsend = 0;
4652   for (proc=0; proc<size; proc++){
4653     len_si[proc] = 0;
4654     if (proc == rank){
4655       len_s[proc] = 0;
4656     } else {
4657       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4658       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4659     }
4660     if (len_s[proc]) {
4661       merge->nsend++;
4662       nrows = 0;
4663       for (i=owners[proc]; i<owners[proc+1]; i++){
4664         if (ai[i+1] > ai[i]) nrows++;
4665       }
4666       len_si[proc] = 2*(nrows+1);
4667       len += len_si[proc];
4668     }
4669   }
4670 
4671   /* determine the number and length of messages to receive for ij-structure */
4672   /*-------------------------------------------------------------------------*/
4673   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4674   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4675 
4676   /* post the Irecv of j-structure */
4677   /*-------------------------------*/
4678   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4679   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4680 
4681   /* post the Isend of j-structure */
4682   /*--------------------------------*/
4683   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4684 
4685   for (proc=0, k=0; proc<size; proc++){
4686     if (!len_s[proc]) continue;
4687     i = owners[proc];
4688     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4689     k++;
4690   }
4691 
4692   /* receives and sends of j-structure are complete */
4693   /*------------------------------------------------*/
4694   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4695   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4696 
4697   /* send and recv i-structure */
4698   /*---------------------------*/
4699   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4700   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4701 
4702   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4703   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4704   for (proc=0,k=0; proc<size; proc++){
4705     if (!len_s[proc]) continue;
4706     /* form outgoing message for i-structure:
4707          buf_si[0]:                 nrows to be sent
4708                [1:nrows]:           row index (global)
4709                [nrows+1:2*nrows+1]: i-structure index
4710     */
4711     /*-------------------------------------------*/
4712     nrows = len_si[proc]/2 - 1;
4713     buf_si_i    = buf_si + nrows+1;
4714     buf_si[0]   = nrows;
4715     buf_si_i[0] = 0;
4716     nrows = 0;
4717     for (i=owners[proc]; i<owners[proc+1]; i++){
4718       anzi = ai[i+1] - ai[i];
4719       if (anzi) {
4720         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4721         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4722         nrows++;
4723       }
4724     }
4725     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4726     k++;
4727     buf_si += len_si[proc];
4728   }
4729 
4730   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4731   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4732 
4733   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4734   for (i=0; i<merge->nrecv; i++){
4735     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4736   }
4737 
4738   ierr = PetscFree(len_si);CHKERRQ(ierr);
4739   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4740   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4741   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4742   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4743   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4744   ierr = PetscFree(status);CHKERRQ(ierr);
4745 
4746   /* compute a local seq matrix in each processor */
4747   /*----------------------------------------------*/
4748   /* allocate bi array and free space for accumulating nonzero column info */
4749   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4750   bi[0] = 0;
4751 
4752   /* create and initialize a linked list */
4753   nlnk = N+1;
4754   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4755 
4756   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4757   len = 0;
4758   len  = ai[owners[rank+1]] - ai[owners[rank]];
4759   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4760   current_space = free_space;
4761 
4762   /* determine symbolic info for each local row */
4763   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4764 
4765   for (k=0; k<merge->nrecv; k++){
4766     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4767     nrows = *buf_ri_k[k];
4768     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4769     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4770   }
4771 
4772   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4773   len = 0;
4774   for (i=0;i<m;i++) {
4775     bnzi   = 0;
4776     /* add local non-zero cols of this proc's seqmat into lnk */
4777     arow   = owners[rank] + i;
4778     anzi   = ai[arow+1] - ai[arow];
4779     aj     = a->j + ai[arow];
4780     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4781     bnzi += nlnk;
4782     /* add received col data into lnk */
4783     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4784       if (i == *nextrow[k]) { /* i-th row */
4785         anzi = *(nextai[k]+1) - *nextai[k];
4786         aj   = buf_rj[k] + *nextai[k];
4787         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4788         bnzi += nlnk;
4789         nextrow[k]++; nextai[k]++;
4790       }
4791     }
4792     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4793 
4794     /* if free space is not available, make more free space */
4795     if (current_space->local_remaining<bnzi) {
4796       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4797       nspacedouble++;
4798     }
4799     /* copy data into free space, then initialize lnk */
4800     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4801     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4802 
4803     current_space->array           += bnzi;
4804     current_space->local_used      += bnzi;
4805     current_space->local_remaining -= bnzi;
4806 
4807     bi[i+1] = bi[i] + bnzi;
4808   }
4809 
4810   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4811 
4812   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4813   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4814   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4815 
4816   /* create symbolic parallel matrix B_mpi */
4817   /*---------------------------------------*/
4818     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4819   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4820   if (n==PETSC_DECIDE) {
4821     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4822   } else {
4823     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4824   }
4825   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4826   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4827   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4828   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4829   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4830 
4831   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4832   B_mpi->assembled     = PETSC_FALSE;
4833   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4834   merge->bi            = bi;
4835   merge->bj            = bj;
4836   merge->buf_ri        = buf_ri;
4837   merge->buf_rj        = buf_rj;
4838   merge->coi           = PETSC_NULL;
4839   merge->coj           = PETSC_NULL;
4840   merge->owners_co     = PETSC_NULL;
4841 
4842   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4843 
4844   /* attach the supporting struct to B_mpi for reuse */
4845   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4846   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4847   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4848   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4849   *mpimat = B_mpi;
4850 
4851   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4852   PetscFunctionReturn(0);
4853 }
4854 
4855 #undef __FUNCT__
4856 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4857 /*@C
4858       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4859                  matrices from each processor
4860 
4861     Collective on MPI_Comm
4862 
4863    Input Parameters:
4864 +    comm - the communicators the parallel matrix will live on
4865 .    seqmat - the input sequential matrices
4866 .    m - number of local rows (or PETSC_DECIDE)
4867 .    n - number of local columns (or PETSC_DECIDE)
4868 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4869 
4870    Output Parameter:
4871 .    mpimat - the parallel matrix generated
4872 
4873     Level: advanced
4874 
4875    Notes:
4876      The dimensions of the sequential matrix in each processor MUST be the same.
4877      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4878      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4879 @*/
4880 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4881 {
4882   PetscErrorCode   ierr;
4883   PetscMPIInt     size;
4884 
4885   PetscFunctionBegin;
4886   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4887   if (size == 1){
4888      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4889      if (scall == MAT_INITIAL_MATRIX){
4890        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4891      } else {
4892        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4893      }
4894      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4895      PetscFunctionReturn(0);
4896   }
4897   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4898   if (scall == MAT_INITIAL_MATRIX){
4899     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4900   }
4901   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4902   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4903   PetscFunctionReturn(0);
4904 }
4905 
4906 #undef __FUNCT__
4907 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4908 /*@
4909      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4910           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4911           with MatGetSize()
4912 
4913     Not Collective
4914 
4915    Input Parameters:
4916 +    A - the matrix
4917 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4918 
4919    Output Parameter:
4920 .    A_loc - the local sequential matrix generated
4921 
4922     Level: developer
4923 
4924 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4925 
4926 @*/
4927 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4928 {
4929   PetscErrorCode  ierr;
4930   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4931   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4932   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4933   MatScalar       *aa=a->a,*ba=b->a,*cam;
4934   PetscScalar     *ca;
4935   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4936   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4937   PetscBool       match;
4938 
4939   PetscFunctionBegin;
4940   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4941   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4942   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4943   if (scall == MAT_INITIAL_MATRIX){
4944     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4945     ci[0] = 0;
4946     for (i=0; i<am; i++){
4947       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4948     }
4949     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4950     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4951     k = 0;
4952     for (i=0; i<am; i++) {
4953       ncols_o = bi[i+1] - bi[i];
4954       ncols_d = ai[i+1] - ai[i];
4955       /* off-diagonal portion of A */
4956       for (jo=0; jo<ncols_o; jo++) {
4957         col = cmap[*bj];
4958         if (col >= cstart) break;
4959         cj[k]   = col; bj++;
4960         ca[k++] = *ba++;
4961       }
4962       /* diagonal portion of A */
4963       for (j=0; j<ncols_d; j++) {
4964         cj[k]   = cstart + *aj++;
4965         ca[k++] = *aa++;
4966       }
4967       /* off-diagonal portion of A */
4968       for (j=jo; j<ncols_o; j++) {
4969         cj[k]   = cmap[*bj++];
4970         ca[k++] = *ba++;
4971       }
4972     }
4973     /* put together the new matrix */
4974     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4975     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4976     /* Since these are PETSc arrays, change flags to free them as necessary. */
4977     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4978     mat->free_a  = PETSC_TRUE;
4979     mat->free_ij = PETSC_TRUE;
4980     mat->nonew   = 0;
4981   } else if (scall == MAT_REUSE_MATRIX){
4982     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4983     ci = mat->i; cj = mat->j; cam = mat->a;
4984     for (i=0; i<am; i++) {
4985       /* off-diagonal portion of A */
4986       ncols_o = bi[i+1] - bi[i];
4987       for (jo=0; jo<ncols_o; jo++) {
4988         col = cmap[*bj];
4989         if (col >= cstart) break;
4990         *cam++ = *ba++; bj++;
4991       }
4992       /* diagonal portion of A */
4993       ncols_d = ai[i+1] - ai[i];
4994       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4995       /* off-diagonal portion of A */
4996       for (j=jo; j<ncols_o; j++) {
4997         *cam++ = *ba++; bj++;
4998       }
4999     }
5000   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5001   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5002   PetscFunctionReturn(0);
5003 }
5004 
5005 #undef __FUNCT__
5006 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5007 /*@C
5008      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5009 
5010     Not Collective
5011 
5012    Input Parameters:
5013 +    A - the matrix
5014 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5015 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5016 
5017    Output Parameter:
5018 .    A_loc - the local sequential matrix generated
5019 
5020     Level: developer
5021 
5022 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5023 
5024 @*/
5025 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5026 {
5027   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5028   PetscErrorCode    ierr;
5029   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5030   IS                isrowa,iscola;
5031   Mat               *aloc;
5032   PetscBool       match;
5033 
5034   PetscFunctionBegin;
5035   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5036   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5037   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5038   if (!row){
5039     start = A->rmap->rstart; end = A->rmap->rend;
5040     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5041   } else {
5042     isrowa = *row;
5043   }
5044   if (!col){
5045     start = A->cmap->rstart;
5046     cmap  = a->garray;
5047     nzA   = a->A->cmap->n;
5048     nzB   = a->B->cmap->n;
5049     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5050     ncols = 0;
5051     for (i=0; i<nzB; i++) {
5052       if (cmap[i] < start) idx[ncols++] = cmap[i];
5053       else break;
5054     }
5055     imark = i;
5056     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5057     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5058     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5059   } else {
5060     iscola = *col;
5061   }
5062   if (scall != MAT_INITIAL_MATRIX){
5063     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5064     aloc[0] = *A_loc;
5065   }
5066   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5067   *A_loc = aloc[0];
5068   ierr = PetscFree(aloc);CHKERRQ(ierr);
5069   if (!row){
5070     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5071   }
5072   if (!col){
5073     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5074   }
5075   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5076   PetscFunctionReturn(0);
5077 }
5078 
5079 #undef __FUNCT__
5080 #define __FUNCT__ "MatGetBrowsOfAcols"
5081 /*@C
5082     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5083 
5084     Collective on Mat
5085 
5086    Input Parameters:
5087 +    A,B - the matrices in mpiaij format
5088 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5089 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5090 
5091    Output Parameter:
5092 +    rowb, colb - index sets of rows and columns of B to extract
5093 -    B_seq - the sequential matrix generated
5094 
5095     Level: developer
5096 
5097 @*/
5098 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5099 {
5100   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5101   PetscErrorCode    ierr;
5102   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5103   IS                isrowb,iscolb;
5104   Mat               *bseq=PETSC_NULL;
5105 
5106   PetscFunctionBegin;
5107   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5108     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5109   }
5110   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5111 
5112   if (scall == MAT_INITIAL_MATRIX){
5113     start = A->cmap->rstart;
5114     cmap  = a->garray;
5115     nzA   = a->A->cmap->n;
5116     nzB   = a->B->cmap->n;
5117     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5118     ncols = 0;
5119     for (i=0; i<nzB; i++) {  /* row < local row index */
5120       if (cmap[i] < start) idx[ncols++] = cmap[i];
5121       else break;
5122     }
5123     imark = i;
5124     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5125     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5126     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5127     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5128   } else {
5129     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5130     isrowb = *rowb; iscolb = *colb;
5131     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5132     bseq[0] = *B_seq;
5133   }
5134   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5135   *B_seq = bseq[0];
5136   ierr = PetscFree(bseq);CHKERRQ(ierr);
5137   if (!rowb){
5138     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5139   } else {
5140     *rowb = isrowb;
5141   }
5142   if (!colb){
5143     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5144   } else {
5145     *colb = iscolb;
5146   }
5147   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5148   PetscFunctionReturn(0);
5149 }
5150 
5151 #undef __FUNCT__
5152 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5153 /*
5154     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5155     of the OFF-DIAGONAL portion of local A
5156 
5157     Collective on Mat
5158 
5159    Input Parameters:
5160 +    A,B - the matrices in mpiaij format
5161 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5162 
5163    Output Parameter:
5164 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5165 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5166 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5167 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5168 
5169     Level: developer
5170 
5171 */
5172 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5173 {
5174   VecScatter_MPI_General *gen_to,*gen_from;
5175   PetscErrorCode         ierr;
5176   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5177   Mat_SeqAIJ             *b_oth;
5178   VecScatter             ctx=a->Mvctx;
5179   MPI_Comm               comm=((PetscObject)ctx)->comm;
5180   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5181   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5182   PetscScalar            *rvalues,*svalues;
5183   MatScalar              *b_otha,*bufa,*bufA;
5184   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5185   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5186   MPI_Status             *sstatus,rstatus;
5187   PetscMPIInt            jj;
5188   PetscInt               *cols,sbs,rbs;
5189   PetscScalar            *vals;
5190 
5191   PetscFunctionBegin;
5192   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5193     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5194   }
5195   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5197 
5198   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5199   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5200   rvalues  = gen_from->values; /* holds the length of receiving row */
5201   svalues  = gen_to->values;   /* holds the length of sending row */
5202   nrecvs   = gen_from->n;
5203   nsends   = gen_to->n;
5204 
5205   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5206   srow     = gen_to->indices;   /* local row index to be sent */
5207   sstarts  = gen_to->starts;
5208   sprocs   = gen_to->procs;
5209   sstatus  = gen_to->sstatus;
5210   sbs      = gen_to->bs;
5211   rstarts  = gen_from->starts;
5212   rprocs   = gen_from->procs;
5213   rbs      = gen_from->bs;
5214 
5215   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5216   if (scall == MAT_INITIAL_MATRIX){
5217     /* i-array */
5218     /*---------*/
5219     /*  post receives */
5220     for (i=0; i<nrecvs; i++){
5221       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5222       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5223       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5224     }
5225 
5226     /* pack the outgoing message */
5227     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5228     sstartsj[0] = 0;  rstartsj[0] = 0;
5229     len = 0; /* total length of j or a array to be sent */
5230     k = 0;
5231     for (i=0; i<nsends; i++){
5232       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5233       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5234       for (j=0; j<nrows; j++) {
5235         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5236         for (l=0; l<sbs; l++){
5237           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5238           rowlen[j*sbs+l] = ncols;
5239           len += ncols;
5240           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5241         }
5242         k++;
5243       }
5244       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5245       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5246     }
5247     /* recvs and sends of i-array are completed */
5248     i = nrecvs;
5249     while (i--) {
5250       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5251     }
5252     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5253 
5254     /* allocate buffers for sending j and a arrays */
5255     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5256     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5257 
5258     /* create i-array of B_oth */
5259     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5260     b_othi[0] = 0;
5261     len = 0; /* total length of j or a array to be received */
5262     k = 0;
5263     for (i=0; i<nrecvs; i++){
5264       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5265       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5266       for (j=0; j<nrows; j++) {
5267         b_othi[k+1] = b_othi[k] + rowlen[j];
5268         len += rowlen[j]; k++;
5269       }
5270       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5271     }
5272 
5273     /* allocate space for j and a arrrays of B_oth */
5274     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5275     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5276 
5277     /* j-array */
5278     /*---------*/
5279     /*  post receives of j-array */
5280     for (i=0; i<nrecvs; i++){
5281       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5282       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5283     }
5284 
5285     /* pack the outgoing message j-array */
5286     k = 0;
5287     for (i=0; i<nsends; i++){
5288       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5289       bufJ = bufj+sstartsj[i];
5290       for (j=0; j<nrows; j++) {
5291         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5292         for (ll=0; ll<sbs; ll++){
5293           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5294           for (l=0; l<ncols; l++){
5295             *bufJ++ = cols[l];
5296           }
5297           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5298         }
5299       }
5300       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5301     }
5302 
5303     /* recvs and sends of j-array are completed */
5304     i = nrecvs;
5305     while (i--) {
5306       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5307     }
5308     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5309   } else if (scall == MAT_REUSE_MATRIX){
5310     sstartsj = *startsj_s;
5311     rstartsj = *startsj_r;
5312     bufa     = *bufa_ptr;
5313     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5314     b_otha   = b_oth->a;
5315   } else {
5316     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5317   }
5318 
5319   /* a-array */
5320   /*---------*/
5321   /*  post receives of a-array */
5322   for (i=0; i<nrecvs; i++){
5323     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5324     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5325   }
5326 
5327   /* pack the outgoing message a-array */
5328   k = 0;
5329   for (i=0; i<nsends; i++){
5330     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5331     bufA = bufa+sstartsj[i];
5332     for (j=0; j<nrows; j++) {
5333       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5334       for (ll=0; ll<sbs; ll++){
5335         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5336         for (l=0; l<ncols; l++){
5337           *bufA++ = vals[l];
5338         }
5339         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5340       }
5341     }
5342     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5343   }
5344   /* recvs and sends of a-array are completed */
5345   i = nrecvs;
5346   while (i--) {
5347     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5348   }
5349   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5350   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5351 
5352   if (scall == MAT_INITIAL_MATRIX){
5353     /* put together the new matrix */
5354     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5355 
5356     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5357     /* Since these are PETSc arrays, change flags to free them as necessary. */
5358     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5359     b_oth->free_a  = PETSC_TRUE;
5360     b_oth->free_ij = PETSC_TRUE;
5361     b_oth->nonew   = 0;
5362 
5363     ierr = PetscFree(bufj);CHKERRQ(ierr);
5364     if (!startsj_s || !bufa_ptr){
5365       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5366       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5367     } else {
5368       *startsj_s = sstartsj;
5369       *startsj_r = rstartsj;
5370       *bufa_ptr  = bufa;
5371     }
5372   }
5373   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5374   PetscFunctionReturn(0);
5375 }
5376 
5377 #undef __FUNCT__
5378 #define __FUNCT__ "MatGetCommunicationStructs"
5379 /*@C
5380   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5381 
5382   Not Collective
5383 
5384   Input Parameters:
5385 . A - The matrix in mpiaij format
5386 
5387   Output Parameter:
5388 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5389 . colmap - A map from global column index to local index into lvec
5390 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5391 
5392   Level: developer
5393 
5394 @*/
5395 #if defined (PETSC_USE_CTABLE)
5396 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5397 #else
5398 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5399 #endif
5400 {
5401   Mat_MPIAIJ *a;
5402 
5403   PetscFunctionBegin;
5404   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5405   PetscValidPointer(lvec, 2);
5406   PetscValidPointer(colmap, 3);
5407   PetscValidPointer(multScatter, 4);
5408   a = (Mat_MPIAIJ *) A->data;
5409   if (lvec) *lvec = a->lvec;
5410   if (colmap) *colmap = a->colmap;
5411   if (multScatter) *multScatter = a->Mvctx;
5412   PetscFunctionReturn(0);
5413 }
5414 
5415 EXTERN_C_BEGIN
5416 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5417 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5418 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5419 EXTERN_C_END
5420 
5421 #undef __FUNCT__
5422 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5423 /*
5424     Computes (B'*A')' since computing B*A directly is untenable
5425 
5426                n                       p                          p
5427         (              )       (              )         (                  )
5428       m (      A       )  *  n (       B      )   =   m (         C        )
5429         (              )       (              )         (                  )
5430 
5431 */
5432 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5433 {
5434   PetscErrorCode     ierr;
5435   Mat                At,Bt,Ct;
5436 
5437   PetscFunctionBegin;
5438   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5439   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5440   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5441   ierr = MatDestroy(&At);CHKERRQ(ierr);
5442   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5443   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5444   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5445   PetscFunctionReturn(0);
5446 }
5447 
5448 #undef __FUNCT__
5449 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5450 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5451 {
5452   PetscErrorCode ierr;
5453   PetscInt       m=A->rmap->n,n=B->cmap->n;
5454   Mat            Cmat;
5455 
5456   PetscFunctionBegin;
5457   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5458   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5459   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5460   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5461   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5462   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5463   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5464   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5465   *C   = Cmat;
5466   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5467   PetscFunctionReturn(0);
5468 }
5469 
5470 /* ----------------------------------------------------------------*/
5471 #undef __FUNCT__
5472 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5473 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5474 {
5475   PetscErrorCode ierr;
5476 
5477   PetscFunctionBegin;
5478   if (scall == MAT_INITIAL_MATRIX){
5479     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5480   }
5481   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5482   PetscFunctionReturn(0);
5483 }
5484 
5485 EXTERN_C_BEGIN
5486 #if defined(PETSC_HAVE_MUMPS)
5487 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5488 #endif
5489 #if defined(PETSC_HAVE_PASTIX)
5490 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5491 #endif
5492 #if defined(PETSC_HAVE_SUPERLU_DIST)
5493 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5494 #endif
5495 #if defined(PETSC_HAVE_SPOOLES)
5496 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5497 #endif
5498 EXTERN_C_END
5499 
5500 /*MC
5501    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5502 
5503    Options Database Keys:
5504 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5505 
5506   Level: beginner
5507 
5508 .seealso: MatCreateAIJ()
5509 M*/
5510 
5511 EXTERN_C_BEGIN
5512 #undef __FUNCT__
5513 #define __FUNCT__ "MatCreate_MPIAIJ"
5514 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5515 {
5516   Mat_MPIAIJ     *b;
5517   PetscErrorCode ierr;
5518   PetscMPIInt    size;
5519 
5520   PetscFunctionBegin;
5521   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5522 
5523   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5524   B->data         = (void*)b;
5525   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5526   B->assembled    = PETSC_FALSE;
5527 
5528   B->insertmode   = NOT_SET_VALUES;
5529   b->size         = size;
5530   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5531 
5532   /* build cache for off array entries formed */
5533   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5534   b->donotstash  = PETSC_FALSE;
5535   b->colmap      = 0;
5536   b->garray      = 0;
5537   b->roworiented = PETSC_TRUE;
5538 
5539   /* stuff used for matrix vector multiply */
5540   b->lvec      = PETSC_NULL;
5541   b->Mvctx     = PETSC_NULL;
5542 
5543   /* stuff for MatGetRow() */
5544   b->rowindices   = 0;
5545   b->rowvalues    = 0;
5546   b->getrowactive = PETSC_FALSE;
5547 
5548 #if defined(PETSC_HAVE_SPOOLES)
5549   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5550                                      "MatGetFactor_mpiaij_spooles",
5551                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5552 #endif
5553 #if defined(PETSC_HAVE_MUMPS)
5554   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5555                                      "MatGetFactor_aij_mumps",
5556                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5557 #endif
5558 #if defined(PETSC_HAVE_PASTIX)
5559   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5560 					   "MatGetFactor_mpiaij_pastix",
5561 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5562 #endif
5563 #if defined(PETSC_HAVE_SUPERLU_DIST)
5564   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5565                                      "MatGetFactor_mpiaij_superlu_dist",
5566                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5567 #endif
5568   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5569                                      "MatStoreValues_MPIAIJ",
5570                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5571   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5572                                      "MatRetrieveValues_MPIAIJ",
5573                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5574   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5575 				     "MatGetDiagonalBlock_MPIAIJ",
5576                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5577   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5578 				     "MatIsTranspose_MPIAIJ",
5579 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5580   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5581 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5582 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5583   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5584 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5585 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5586   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5587 				     "MatDiagonalScaleLocal_MPIAIJ",
5588 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5589   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5590                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5591                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5592   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5593                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5594                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5595   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5596                                      "MatConvert_MPIAIJ_MPISBAIJ",
5597                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5599                                      "MatMatMult_MPIDense_MPIAIJ",
5600                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5602                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5603                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5605                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5606                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5608   PetscFunctionReturn(0);
5609 }
5610 EXTERN_C_END
5611 
5612 #undef __FUNCT__
5613 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5614 /*@
5615      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5616          and "off-diagonal" part of the matrix in CSR format.
5617 
5618    Collective on MPI_Comm
5619 
5620    Input Parameters:
5621 +  comm - MPI communicator
5622 .  m - number of local rows (Cannot be PETSC_DECIDE)
5623 .  n - This value should be the same as the local size used in creating the
5624        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5625        calculated if N is given) For square matrices n is almost always m.
5626 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5627 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5628 .   i - row indices for "diagonal" portion of matrix
5629 .   j - column indices
5630 .   a - matrix values
5631 .   oi - row indices for "off-diagonal" portion of matrix
5632 .   oj - column indices
5633 -   oa - matrix values
5634 
5635    Output Parameter:
5636 .   mat - the matrix
5637 
5638    Level: advanced
5639 
5640    Notes:
5641        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5642        must free the arrays once the matrix has been destroyed and not before.
5643 
5644        The i and j indices are 0 based
5645 
5646        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5647 
5648        This sets local rows and cannot be used to set off-processor values.
5649 
5650        You cannot later use MatSetValues() to change values in this matrix.
5651 
5652 .keywords: matrix, aij, compressed row, sparse, parallel
5653 
5654 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5655           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5656 @*/
5657 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5658 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5659 {
5660   PetscErrorCode ierr;
5661   Mat_MPIAIJ     *maij;
5662 
5663  PetscFunctionBegin;
5664   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5665   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5666   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5667   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5668   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5669   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5670   maij = (Mat_MPIAIJ*) (*mat)->data;
5671   maij->donotstash     = PETSC_TRUE;
5672   (*mat)->preallocated = PETSC_TRUE;
5673 
5674   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5675   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5676 
5677   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5678   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5679 
5680   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5681   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5682   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5683   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5684 
5685   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5686   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5687   PetscFunctionReturn(0);
5688 }
5689 
5690 /*
5691     Special version for direct calls from Fortran
5692 */
5693 #include <petsc-private/fortranimpl.h>
5694 
5695 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5696 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5697 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5698 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5699 #endif
5700 
5701 /* Change these macros so can be used in void function */
5702 #undef CHKERRQ
5703 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5704 #undef SETERRQ2
5705 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5706 #undef SETERRQ3
5707 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5708 #undef SETERRQ
5709 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5710 
5711 EXTERN_C_BEGIN
5712 #undef __FUNCT__
5713 #define __FUNCT__ "matsetvaluesmpiaij_"
5714 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5715 {
5716   Mat             mat = *mmat;
5717   PetscInt        m = *mm, n = *mn;
5718   InsertMode      addv = *maddv;
5719   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5720   PetscScalar     value;
5721   PetscErrorCode  ierr;
5722 
5723   MatCheckPreallocated(mat,1);
5724   if (mat->insertmode == NOT_SET_VALUES) {
5725     mat->insertmode = addv;
5726   }
5727 #if defined(PETSC_USE_DEBUG)
5728   else if (mat->insertmode != addv) {
5729     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5730   }
5731 #endif
5732   {
5733   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5734   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5735   PetscBool       roworiented = aij->roworiented;
5736 
5737   /* Some Variables required in the macro */
5738   Mat             A = aij->A;
5739   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5740   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5741   MatScalar       *aa = a->a;
5742   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5743   Mat             B = aij->B;
5744   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5745   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5746   MatScalar       *ba = b->a;
5747 
5748   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5749   PetscInt        nonew = a->nonew;
5750   MatScalar       *ap1,*ap2;
5751 
5752   PetscFunctionBegin;
5753   for (i=0; i<m; i++) {
5754     if (im[i] < 0) continue;
5755 #if defined(PETSC_USE_DEBUG)
5756     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5757 #endif
5758     if (im[i] >= rstart && im[i] < rend) {
5759       row      = im[i] - rstart;
5760       lastcol1 = -1;
5761       rp1      = aj + ai[row];
5762       ap1      = aa + ai[row];
5763       rmax1    = aimax[row];
5764       nrow1    = ailen[row];
5765       low1     = 0;
5766       high1    = nrow1;
5767       lastcol2 = -1;
5768       rp2      = bj + bi[row];
5769       ap2      = ba + bi[row];
5770       rmax2    = bimax[row];
5771       nrow2    = bilen[row];
5772       low2     = 0;
5773       high2    = nrow2;
5774 
5775       for (j=0; j<n; j++) {
5776         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5777         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5778         if (in[j] >= cstart && in[j] < cend){
5779           col = in[j] - cstart;
5780           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5781         } else if (in[j] < 0) continue;
5782 #if defined(PETSC_USE_DEBUG)
5783         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5784 #endif
5785         else {
5786           if (mat->was_assembled) {
5787             if (!aij->colmap) {
5788               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5789             }
5790 #if defined (PETSC_USE_CTABLE)
5791             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5792 	    col--;
5793 #else
5794             col = aij->colmap[in[j]] - 1;
5795 #endif
5796             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5797               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5798               col =  in[j];
5799               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5800               B = aij->B;
5801               b = (Mat_SeqAIJ*)B->data;
5802               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5803               rp2      = bj + bi[row];
5804               ap2      = ba + bi[row];
5805               rmax2    = bimax[row];
5806               nrow2    = bilen[row];
5807               low2     = 0;
5808               high2    = nrow2;
5809               bm       = aij->B->rmap->n;
5810               ba = b->a;
5811             }
5812           } else col = in[j];
5813           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5814         }
5815       }
5816     } else {
5817       if (!aij->donotstash) {
5818         if (roworiented) {
5819           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5820         } else {
5821           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5822         }
5823       }
5824     }
5825   }}
5826   PetscFunctionReturnVoid();
5827 }
5828 EXTERN_C_END
5829 
5830