xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a6dfd86ebdf75fa024070af26d51b62fd16650a3)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233         for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236           jj++;
237         }
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267         for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270           jj++;
271         }
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++) {
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1)  low1 = 0;     \
382     else                 high1 = nrow1; \
383     lastcol1 = col;\
384     while (high1-low1 > 5) { \
385       t = (low1+high1)/2; \
386       if (rp1[t] > col) high1 = t; \
387       else              low1  = t; \
388     } \
389       for (_i=low1; _i<high1; _i++) { \
390         if (rp1[_i] > col) break; \
391         if (rp1[_i] == col) { \
392           if (addv == ADD_VALUES) ap1[_i] += value;   \
393           else                    ap1[_i] = value; \
394           goto a_noinsert; \
395         } \
396       }  \
397       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
398       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
399       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
400       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
401       N = nrow1++ - 1; a->nz++; high1++; \
402       /* shift up all the later entries in this row */ \
403       for (ii=N; ii>=_i; ii--) { \
404         rp1[ii+1] = rp1[ii]; \
405         ap1[ii+1] = ap1[ii]; \
406       } \
407       rp1[_i] = col;  \
408       ap1[_i] = value;  \
409       a_noinsert: ; \
410       ailen[row] = nrow1; \
411 }
412 
413 
414 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
415 { \
416     if (col <= lastcol2) low2 = 0;                        \
417     else high2 = nrow2;                                   \
418     lastcol2 = col;                                       \
419     while (high2-low2 > 5) {                              \
420       t = (low2+high2)/2;                                 \
421       if (rp2[t] > col) high2 = t;                        \
422       else             low2  = t;                         \
423     }                                                     \
424     for (_i=low2; _i<high2; _i++) {                       \
425       if (rp2[_i] > col) break;                           \
426       if (rp2[_i] == col) {                               \
427         if (addv == ADD_VALUES) ap2[_i] += value;         \
428         else                    ap2[_i] = value;          \
429         goto b_noinsert;                                  \
430       }                                                   \
431     }                                                     \
432     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
433     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
434     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
435     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
436     N = nrow2++ - 1; b->nz++; high2++;                    \
437     /* shift up all the later entries in this row */      \
438     for (ii=N; ii>=_i; ii--) {                            \
439       rp2[ii+1] = rp2[ii];                                \
440       ap2[ii+1] = ap2[ii];                                \
441     }                                                     \
442     rp2[_i] = col;                                        \
443     ap2[_i] = value;                                      \
444     b_noinsert: ;                                         \
445     bilen[row] = nrow2;                                   \
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
450 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
451 {
452   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
453   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
454   PetscErrorCode ierr;
455   PetscInt       l,*garray = mat->garray,diag;
456 
457   PetscFunctionBegin;
458   /* code only works for square matrices A */
459 
460   /* find size of row to the left of the diagonal part */
461   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
462   row  = row - diag;
463   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
464     if (garray[b->j[b->i[row]+l]] > diag) break;
465   }
466   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
467 
468   /* diagonal part */
469   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* right of diagonal part */
472   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatSetValues_MPIAIJ"
478 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
479 {
480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
481   PetscScalar    value;
482   PetscErrorCode ierr;
483   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
484   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
485   PetscBool      roworiented = aij->roworiented;
486 
487   /* Some Variables required in the macro */
488   Mat            A = aij->A;
489   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
490   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
491   MatScalar      *aa = a->a;
492   PetscBool      ignorezeroentries = a->ignorezeroentries;
493   Mat            B = aij->B;
494   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
495   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
496   MatScalar      *ba = b->a;
497 
498   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
499   PetscInt       nonew;
500   MatScalar      *ap1,*ap2;
501 
502   PetscFunctionBegin;
503   if (v) PetscValidScalarPointer(v,6);
504   for (i=0; i<m; i++) {
505     if (im[i] < 0) continue;
506 #if defined(PETSC_USE_DEBUG)
507     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
508 #endif
509     if (im[i] >= rstart && im[i] < rend) {
510       row      = im[i] - rstart;
511       lastcol1 = -1;
512       rp1      = aj + ai[row];
513       ap1      = aa + ai[row];
514       rmax1    = aimax[row];
515       nrow1    = ailen[row];
516       low1     = 0;
517       high1    = nrow1;
518       lastcol2 = -1;
519       rp2      = bj + bi[row];
520       ap2      = ba + bi[row];
521       rmax2    = bimax[row];
522       nrow2    = bilen[row];
523       low2     = 0;
524       high2    = nrow2;
525 
526       for (j=0; j<n; j++) {
527         if (v) {
528           if (roworiented) value = v[i*n+j];
529           else             value = v[i+j*m];
530         } else value = 0.0;
531         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
532         if (in[j] >= cstart && in[j] < cend) {
533           col = in[j] - cstart;
534           nonew = a->nonew;
535           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
536         } else if (in[j] < 0) continue;
537 #if defined(PETSC_USE_DEBUG)
538         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
539 #endif
540         else {
541           if (mat->was_assembled) {
542             if (!aij->colmap) {
543               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
544             }
545 #if defined (PETSC_USE_CTABLE)
546             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
547             col--;
548 #else
549             col = aij->colmap[in[j]] - 1;
550 #endif
551             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
552               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
553               col =  in[j];
554               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
555               B = aij->B;
556               b = (Mat_SeqAIJ*)B->data;
557               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
558               rp2      = bj + bi[row];
559               ap2      = ba + bi[row];
560               rmax2    = bimax[row];
561               nrow2    = bilen[row];
562               low2     = 0;
563               high2    = nrow2;
564               bm       = aij->B->rmap->n;
565               ba = b->a;
566             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
567           } else col = in[j];
568           nonew = b->nonew;
569           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
570         }
571       }
572     } else {
573       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
574       if (!aij->donotstash) {
575         mat->assembled = PETSC_FALSE;
576         if (roworiented) {
577           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
578         } else {
579           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
580         }
581       }
582     }
583   }
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNCT__
588 #define __FUNCT__ "MatGetValues_MPIAIJ"
589 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
590 {
591   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
592   PetscErrorCode ierr;
593   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
594   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
595 
596   PetscFunctionBegin;
597   for (i=0; i<m; i++) {
598     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
599     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
600     if (idxm[i] >= rstart && idxm[i] < rend) {
601       row = idxm[i] - rstart;
602       for (j=0; j<n; j++) {
603         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
604         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
605         if (idxn[j] >= cstart && idxn[j] < cend) {
606           col = idxn[j] - cstart;
607           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
608         } else {
609           if (!aij->colmap) {
610             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
611           }
612 #if defined (PETSC_USE_CTABLE)
613           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
614           col --;
615 #else
616           col = aij->colmap[idxn[j]] - 1;
617 #endif
618           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
619           else {
620             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
621           }
622         }
623       }
624     } else {
625       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
626     }
627   }
628   PetscFunctionReturn(0);
629 }
630 
631 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
635 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
636 {
637   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
638   PetscErrorCode ierr;
639   PetscInt       nstash,reallocs;
640   InsertMode     addv;
641 
642   PetscFunctionBegin;
643   if (aij->donotstash || mat->nooffprocentries) {
644     PetscFunctionReturn(0);
645   }
646 
647   /* make sure all processors are either in INSERTMODE or ADDMODE */
648   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
649   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
650   mat->insertmode = addv; /* in case this processor had no cache */
651 
652   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
653   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
654   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
655   PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNCT__
659 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
660 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
661 {
662   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
663   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
664   PetscErrorCode ierr;
665   PetscMPIInt    n;
666   PetscInt       i,j,rstart,ncols,flg;
667   PetscInt       *row,*col;
668   PetscBool      other_disassembled;
669   PetscScalar    *val;
670   InsertMode     addv = mat->insertmode;
671 
672   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n;) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
682         if (j < n) ncols = j-i;
683         else       ncols = n-i;
684         /* Now assemble all these values with a single function call */
685         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
686         i = j;
687       }
688     }
689     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
690   }
691   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
692   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
693 
694   /* determine if any processor has disassembled, if so we must
695      also disassemble ourselfs, in order that we may reassemble. */
696   /*
697      if nonzero structure of submatrix B cannot change then we know that
698      no processor disassembled thus we can skip this stuff
699   */
700   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
701     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
702     if (mat->was_assembled && !other_disassembled) {
703       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
704     }
705   }
706   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
707     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
708   }
709   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
710   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
711   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
712   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
713 
714   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
715   aij->rowvalues = 0;
716 
717   /* used by MatAXPY() */
718   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
719   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
720 
721   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
722   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
728 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
729 {
730   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode ierr;
732 
733   PetscFunctionBegin;
734   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
735   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatZeroRows_MPIAIJ"
741 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
742 {
743   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
744   PetscErrorCode    ierr;
745   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
746   PetscInt          i,*owners = A->rmap->range;
747   PetscInt          *nprocs,j,idx,nsends,row;
748   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
749   PetscInt          *rvalues,count,base,slen,*source;
750   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
751   MPI_Comm          comm = ((PetscObject)A)->comm;
752   MPI_Request       *send_waits,*recv_waits;
753   MPI_Status        recv_status,*send_status;
754   const PetscScalar *xx;
755   PetscScalar       *bb;
756 #if defined(PETSC_DEBUG)
757   PetscBool      found = PETSC_FALSE;
758 #endif
759 
760   PetscFunctionBegin;
761   /*  first count number of contributors to each processor */
762   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
763   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
764   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
765   j = 0;
766   for (i=0; i<N; i++) {
767     if (lastidx > (idx = rows[i])) j = 0;
768     lastidx = idx;
769     for (; j<size; j++) {
770       if (idx >= owners[j] && idx < owners[j+1]) {
771         nprocs[2*j]++;
772         nprocs[2*j+1] = 1;
773         owner[i] = j;
774 #if defined(PETSC_DEBUG)
775         found = PETSC_TRUE;
776 #endif
777         break;
778       }
779     }
780 #if defined(PETSC_DEBUG)
781     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
782     found = PETSC_FALSE;
783 #endif
784   }
785   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
786 
787   if (A->nooffproczerorows) {
788     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
789     nrecvs = nsends;
790     nmax   = N;
791   } else {
792     /* inform other processors of number of messages and max length*/
793     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
794   }
795 
796   /* post receives:   */
797   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
798   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
799   for (i=0; i<nrecvs; i++) {
800     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
801   }
802 
803   /* do sends:
804       1) starts[i] gives the starting index in svalues for stuff going to
805          the ith processor
806   */
807   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
808   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
809   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
810   starts[0] = 0;
811   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
812   for (i=0; i<N; i++) {
813     svalues[starts[owner[i]]++] = rows[i];
814   }
815 
816   starts[0] = 0;
817   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
818   count = 0;
819   for (i=0; i<size; i++) {
820     if (nprocs[2*i+1]) {
821       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
822     }
823   }
824   ierr = PetscFree(starts);CHKERRQ(ierr);
825 
826   base = owners[rank];
827 
828   /*  wait on receives */
829   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
830   count  = nrecvs; slen = 0;
831   while (count) {
832     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
833     /* unpack receives into our local space */
834     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
835     source[imdex]  = recv_status.MPI_SOURCE;
836     lens[imdex]    = n;
837     slen          += n;
838     count--;
839   }
840   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
841 
842   /* move the data into the send scatter */
843   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
844   count = 0;
845   for (i=0; i<nrecvs; i++) {
846     values = rvalues + i*nmax;
847     for (j=0; j<lens[i]; j++) {
848       lrows[count++] = values[j] - base;
849     }
850   }
851   ierr = PetscFree(rvalues);CHKERRQ(ierr);
852   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
853   ierr = PetscFree(owner);CHKERRQ(ierr);
854   ierr = PetscFree(nprocs);CHKERRQ(ierr);
855 
856   /* fix right hand side if needed */
857   if (x && b) {
858     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
859     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
860     for (i=0; i<slen; i++) {
861       bb[lrows[i]] = diag*xx[lrows[i]];
862     }
863     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
864     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
865   }
866   /*
867         Zero the required rows. If the "diagonal block" of the matrix
868      is square and the user wishes to set the diagonal we use separate
869      code so that MatSetValues() is not called for each diagonal allocating
870      new memory, thus calling lots of mallocs and slowing things down.
871 
872   */
873   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
874   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
875   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
876     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
877   } else if (diag != 0.0) {
878     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
879     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
880       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
881 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     }
883     for (i = 0; i < slen; i++) {
884       row  = lrows[i] + rstart;
885       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
886     }
887     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
889   } else {
890     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
891   }
892   ierr = PetscFree(lrows);CHKERRQ(ierr);
893 
894   /* wait on sends */
895   if (nsends) {
896     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
897     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
898     ierr = PetscFree(send_status);CHKERRQ(ierr);
899   }
900   ierr = PetscFree(send_waits);CHKERRQ(ierr);
901   ierr = PetscFree(svalues);CHKERRQ(ierr);
902   PetscFunctionReturn(0);
903 }
904 
905 #undef __FUNCT__
906 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
907 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
908 {
909   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
910   PetscErrorCode    ierr;
911   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
912   PetscInt          i,*owners = A->rmap->range;
913   PetscInt          *nprocs,j,idx,nsends;
914   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
915   PetscInt          *rvalues,count,base,slen,*source;
916   PetscInt          *lens,*lrows,*values,m;
917   MPI_Comm          comm = ((PetscObject)A)->comm;
918   MPI_Request       *send_waits,*recv_waits;
919   MPI_Status        recv_status,*send_status;
920   const PetscScalar *xx;
921   PetscScalar       *bb,*mask;
922   Vec               xmask,lmask;
923   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
924   const PetscInt    *aj, *ii,*ridx;
925   PetscScalar       *aa;
926 #if defined(PETSC_DEBUG)
927   PetscBool         found = PETSC_FALSE;
928 #endif
929 
930   PetscFunctionBegin;
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i] = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974   starts[0] = 0;
975   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
976   for (i=0; i<N; i++) {
977     svalues[starts[owner[i]]++] = rows[i];
978   }
979 
980   starts[0] = 0;
981   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
982   count = 0;
983   for (i=0; i<size; i++) {
984     if (nprocs[2*i+1]) {
985       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
986     }
987   }
988   ierr = PetscFree(starts);CHKERRQ(ierr);
989 
990   base = owners[rank];
991 
992   /*  wait on receives */
993   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
994   count  = nrecvs; slen = 0;
995   while (count) {
996     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
997     /* unpack receives into our local space */
998     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
999     source[imdex]  = recv_status.MPI_SOURCE;
1000     lens[imdex]    = n;
1001     slen          += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) {
1012       lrows[count++] = values[j] - base;
1013     }
1014   }
1015   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1016   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1017   ierr = PetscFree(owner);CHKERRQ(ierr);
1018   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1019   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1020 
1021   /* zero diagonal part of matrix */
1022   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1023 
1024   /* handle off diagonal part of matrix */
1025   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1026   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1027   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1028   for (i=0; i<slen; i++) {
1029     bb[lrows[i]] = 1;
1030   }
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x) {
1036     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1039     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1040   }
1041   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1042 
1043   /* remove zeroed rows of off diagonal matrix */
1044   ii = aij->i;
1045   for (i=0; i<slen; i++) {
1046     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1047   }
1048 
1049   /* loop over all elements of off process part of matrix zeroing removed columns*/
1050   if (aij->compressedrow.use) {
1051     m    = aij->compressedrow.nrows;
1052     ii   = aij->compressedrow.i;
1053     ridx = aij->compressedrow.rindex;
1054     for (i=0; i<m; i++) {
1055       n   = ii[i+1] - ii[i];
1056       aj  = aij->j + ii[i];
1057       aa  = aij->a + ii[i];
1058 
1059       for (j=0; j<n; j++) {
1060         if (PetscAbsScalar(mask[*aj])) {
1061           if (b) bb[*ridx] -= *aa*xx[*aj];
1062           *aa        = 0.0;
1063         }
1064         aa++;
1065         aj++;
1066       }
1067       ridx++;
1068     }
1069   } else { /* do not use compressed row format */
1070     m = l->B->rmap->n;
1071     for (i=0; i<m; i++) {
1072       n   = ii[i+1] - ii[i];
1073       aj  = aij->j + ii[i];
1074       aa  = aij->a + ii[i];
1075       for (j=0; j<n; j++) {
1076         if (PetscAbsScalar(mask[*aj])) {
1077           if (b) bb[i] -= *aa*xx[*aj];
1078           *aa    = 0.0;
1079         }
1080         aa++;
1081         aj++;
1082       }
1083     }
1084   }
1085   if (x) {
1086     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1087     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1088   }
1089   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1090   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1091   ierr = PetscFree(lrows);CHKERRQ(ierr);
1092 
1093   /* wait on sends */
1094   if (nsends) {
1095     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1096     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1097     ierr = PetscFree(send_status);CHKERRQ(ierr);
1098   }
1099   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1100   ierr = PetscFree(svalues);CHKERRQ(ierr);
1101 
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatMult_MPIAIJ"
1107 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1108 {
1109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1110   PetscErrorCode ierr;
1111   PetscInt       nt;
1112 
1113   PetscFunctionBegin;
1114   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1115   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1116   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1125 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1137 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1138 {
1139   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1144   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1145   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1146   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1152 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1153 {
1154   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1155   PetscErrorCode ierr;
1156   PetscBool      merged;
1157 
1158   PetscFunctionBegin;
1159   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1160   /* do nondiagonal part */
1161   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1162   if (!merged) {
1163     /* send it on its way */
1164     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165     /* do local part */
1166     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1167     /* receive remote parts: note this assumes the values are not actually */
1168     /* added in yy until the next line, */
1169     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170   } else {
1171     /* do local part */
1172     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1173     /* send it on its way */
1174     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1175     /* values actually were received in the Begin() but we need to call this nop */
1176     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 EXTERN_C_BEGIN
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1184 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1185 {
1186   MPI_Comm       comm;
1187   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1188   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1189   IS             Me,Notme;
1190   PetscErrorCode ierr;
1191   PetscInt       M,N,first,last,*notme,i;
1192   PetscMPIInt    size;
1193 
1194   PetscFunctionBegin;
1195 
1196   /* Easy test: symmetric diagonal block */
1197   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1198   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1199   if (!*f) PetscFunctionReturn(0);
1200   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1201   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1202   if (size == 1) PetscFunctionReturn(0);
1203 
1204   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1205   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1206   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1207   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1208   for (i=0; i<first; i++) notme[i] = i;
1209   for (i=last; i<M; i++) notme[i-last+first] = i;
1210   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1211   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1212   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1213   Aoff = Aoffs[0];
1214   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1215   Boff = Boffs[0];
1216   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1217   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1218   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1219   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1220   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1221   ierr = PetscFree(notme);CHKERRQ(ierr);
1222    PetscFunctionReturn(0);
1223 }
1224 EXTERN_C_END
1225 
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1228 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1229 {
1230   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   /* do nondiagonal part */
1235   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1236   /* send it on its way */
1237   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1238   /* do local part */
1239   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1240   /* receive remote parts */
1241   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 /*
1246   This only works correctly for square matrices where the subblock A->A is the
1247    diagonal block
1248 */
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1251 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1252 {
1253   PetscErrorCode ierr;
1254   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1255 
1256   PetscFunctionBegin;
1257   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1258   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1259   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatScale_MPIAIJ"
1265 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1266 {
1267   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1268   PetscErrorCode ierr;
1269 
1270   PetscFunctionBegin;
1271   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1272   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatDestroy_MPIAIJ"
1278 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1279 {
1280   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1281   PetscErrorCode ierr;
1282 
1283   PetscFunctionBegin;
1284 #if defined(PETSC_USE_LOG)
1285   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1286 #endif
1287   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1288   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1289   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1290   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1291 #if defined (PETSC_USE_CTABLE)
1292   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1293 #else
1294   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1295 #endif
1296   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1297   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1298   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1299   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1300   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1301   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1302 
1303   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1317 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1318 {
1319   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1320   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1321   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1322   PetscErrorCode    ierr;
1323   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1324   int               fd;
1325   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1326   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1327   PetscScalar       *column_values;
1328   PetscInt          message_count,flowcontrolcount;
1329   FILE              *file;
1330 
1331   PetscFunctionBegin;
1332   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1333   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1334   nz   = A->nz + B->nz;
1335   if (!rank) {
1336     header[0] = MAT_FILE_CLASSID;
1337     header[1] = mat->rmap->N;
1338     header[2] = mat->cmap->N;
1339     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1341     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1342     /* get largest number of rows any processor has */
1343     rlen = mat->rmap->n;
1344     range = mat->rmap->range;
1345     for (i=1; i<size; i++) {
1346       rlen = PetscMax(rlen,range[i+1] - range[i]);
1347     }
1348   } else {
1349     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1350     rlen = mat->rmap->n;
1351   }
1352 
1353   /* load up the local row counts */
1354   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1355   for (i=0; i<mat->rmap->n; i++) {
1356     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1357   }
1358 
1359   /* store the row lengths to the file */
1360   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1361   if (!rank) {
1362     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1363     for (i=1; i<size; i++) {
1364       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1365       rlen = range[i+1] - range[i];
1366       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1367       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1368     }
1369     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1370   } else {
1371     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1372     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1373     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1374   }
1375   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1376 
1377   /* load up the local column indices */
1378   nzmax = nz; /* )th processor needs space a largest processor needs */
1379   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1380   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1381   cnt  = 0;
1382   for (i=0; i<mat->rmap->n; i++) {
1383     for (j=B->i[i]; j<B->i[i+1]; j++) {
1384       if ( (col = garray[B->j[j]]) > cstart) break;
1385       column_indices[cnt++] = col;
1386     }
1387     for (k=A->i[i]; k<A->i[i+1]; k++) {
1388       column_indices[cnt++] = A->j[k] + cstart;
1389     }
1390     for (; j<B->i[i+1]; j++) {
1391       column_indices[cnt++] = garray[B->j[j]];
1392     }
1393   }
1394   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1395 
1396   /* store the column indices to the file */
1397    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1398   if (!rank) {
1399     MPI_Status status;
1400     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1401     for (i=1; i<size; i++) {
1402       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1403       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1404       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1405       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1406       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1407     }
1408      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1409   } else {
1410     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1411     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1412     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1413     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1414   }
1415   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1416 
1417   /* load up the local column values */
1418   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1419   cnt  = 0;
1420   for (i=0; i<mat->rmap->n; i++) {
1421     for (j=B->i[i]; j<B->i[i+1]; j++) {
1422       if ( garray[B->j[j]] > cstart) break;
1423       column_values[cnt++] = B->a[j];
1424     }
1425     for (k=A->i[i]; k<A->i[i+1]; k++) {
1426       column_values[cnt++] = A->a[k];
1427     }
1428     for (; j<B->i[i+1]; j++) {
1429       column_values[cnt++] = B->a[j];
1430     }
1431   }
1432   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1433 
1434   /* store the column values to the file */
1435    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1436   if (!rank) {
1437     MPI_Status status;
1438     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1439     for (i=1; i<size; i++) {
1440        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1441       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1442       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1443       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1444       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1445     }
1446     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1447   } else {
1448     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1449     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1450     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1451     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1452   }
1453   ierr = PetscFree(column_values);CHKERRQ(ierr);
1454 
1455   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1456   if (file) {
1457     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1458   }
1459 
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1465 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1466 {
1467   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1468   PetscErrorCode    ierr;
1469   PetscMPIInt       rank = aij->rank,size = aij->size;
1470   PetscBool         isdraw,iascii,isbinary;
1471   PetscViewer       sviewer;
1472   PetscViewerFormat format;
1473 
1474   PetscFunctionBegin;
1475   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1476   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1477   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1478   if (iascii) {
1479     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1480     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1481       MatInfo    info;
1482       PetscBool  inodes;
1483 
1484       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1485       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1486       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1487       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1488       if (!inodes) {
1489         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1490                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1491       } else {
1492         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1493                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1494       }
1495       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1497       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1498       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1499       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1500       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1501       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1502       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1503       PetscFunctionReturn(0);
1504     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1505       PetscInt   inodecount,inodelimit,*inodes;
1506       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1507       if (inodes) {
1508         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1509       } else {
1510         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1511       }
1512       PetscFunctionReturn(0);
1513     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1514       PetscFunctionReturn(0);
1515     }
1516   } else if (isbinary) {
1517     if (size == 1) {
1518       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1519       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1520     } else {
1521       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1522     }
1523     PetscFunctionReturn(0);
1524   } else if (isdraw) {
1525     PetscDraw  draw;
1526     PetscBool  isnull;
1527     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1528     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1529   }
1530 
1531   if (size == 1) {
1532     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1533     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1534   } else {
1535     /* assemble the entire matrix onto first processor. */
1536     Mat         A;
1537     Mat_SeqAIJ  *Aloc;
1538     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1539     MatScalar   *a;
1540 
1541     if (mat->rmap->N > 1024) {
1542       PetscBool  flg = PETSC_FALSE;
1543 
1544       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1545       if (!flg) {
1546         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1547       }
1548     }
1549 
1550     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1551     if (!rank) {
1552       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1553     } else {
1554       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1555     }
1556     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1557     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1558     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1559     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1560     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1561 
1562     /* copy over the A part */
1563     Aloc = (Mat_SeqAIJ*)aij->A->data;
1564     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1565     row = mat->rmap->rstart;
1566     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1567     for (i=0; i<m; i++) {
1568       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1569       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1570     }
1571     aj = Aloc->j;
1572     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1573 
1574     /* copy over the B part */
1575     Aloc = (Mat_SeqAIJ*)aij->B->data;
1576     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1577     row  = mat->rmap->rstart;
1578     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1579     ct   = cols;
1580     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1581     for (i=0; i<m; i++) {
1582       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1583       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1584     }
1585     ierr = PetscFree(ct);CHKERRQ(ierr);
1586     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1587     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1588     /*
1589        Everyone has to call to draw the matrix since the graphics waits are
1590        synchronized across all processors that share the PetscDraw object
1591     */
1592     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1593     if (!rank) {
1594       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1595       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1596       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1597       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1598     }
1599     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1600     ierr = MatDestroy(&A);CHKERRQ(ierr);
1601   }
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 #undef __FUNCT__
1606 #define __FUNCT__ "MatView_MPIAIJ"
1607 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1608 {
1609   PetscErrorCode ierr;
1610   PetscBool      iascii,isdraw,issocket,isbinary;
1611 
1612   PetscFunctionBegin;
1613   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1614   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1615   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1616   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1617   if (iascii || isdraw || isbinary || issocket) {
1618     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1619   } else {
1620     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1621   }
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 #undef __FUNCT__
1626 #define __FUNCT__ "MatSOR_MPIAIJ"
1627 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1628 {
1629   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1630   PetscErrorCode ierr;
1631   Vec            bb1 = 0;
1632   PetscBool      hasop;
1633 
1634   PetscFunctionBegin;
1635   if (flag == SOR_APPLY_UPPER) {
1636     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1637     PetscFunctionReturn(0);
1638   }
1639 
1640   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1641     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1642   }
1643 
1644   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1645     if (flag & SOR_ZERO_INITIAL_GUESS) {
1646       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1647       its--;
1648     }
1649 
1650     while (its--) {
1651       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1652       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1653 
1654       /* update rhs: bb1 = bb - B*x */
1655       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1656       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1657 
1658       /* local sweep */
1659       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1660     }
1661   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1662     if (flag & SOR_ZERO_INITIAL_GUESS) {
1663       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1664       its--;
1665     }
1666     while (its--) {
1667       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1668       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1669 
1670       /* update rhs: bb1 = bb - B*x */
1671       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1672       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1673 
1674       /* local sweep */
1675       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1676     }
1677   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1678     if (flag & SOR_ZERO_INITIAL_GUESS) {
1679       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1680       its--;
1681     }
1682     while (its--) {
1683       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1684       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1685 
1686       /* update rhs: bb1 = bb - B*x */
1687       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1688       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1689 
1690       /* local sweep */
1691       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1692     }
1693   } else if (flag & SOR_EISENSTAT) {
1694     Vec         xx1;
1695 
1696     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1697     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1698 
1699     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1700     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1701     if (!mat->diag) {
1702       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1703       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1704     }
1705     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1706     if (hasop) {
1707       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1708     } else {
1709       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1710     }
1711     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1712 
1713     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1714 
1715     /* local sweep */
1716     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1717     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1718     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1719   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1720 
1721   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatPermute_MPIAIJ"
1727 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1728 {
1729   Mat            aA,aB,Aperm;
1730   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1731   PetscScalar    *aa,*ba;
1732   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1733   PetscSF        rowsf,sf;
1734   IS             parcolp = PETSC_NULL;
1735   PetscBool      done;
1736   PetscErrorCode ierr;
1737 
1738   PetscFunctionBegin;
1739   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1740   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1741   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1742   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1743 
1744   /* Invert row permutation to find out where my rows should go */
1745   ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1748   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1749   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1750   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1751 
1752   /* Invert column permutation to find out where my columns should go */
1753   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1754   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1755   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1756   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1757   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1758   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1759   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1760 
1761   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1762   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1763   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1764 
1765   /* Find out where my gcols should go */
1766   ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr);
1767   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1768   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1769   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1770   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1771   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1772   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1773   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1774 
1775   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1776   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1777   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1778   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1779   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1780   for (i=0; i<m; i++) {
1781     PetscInt row = rdest[i],rowner;
1782     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1783     for (j=ai[i]; j<ai[i+1]; j++) {
1784       PetscInt cowner,col = cdest[aj[j]];
1785       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1786       if (rowner == cowner) dnnz[i]++;
1787       else onnz[i]++;
1788     }
1789     for (j=bi[i]; j<bi[i+1]; j++) {
1790       PetscInt cowner,col = gcdest[bj[j]];
1791       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1792       if (rowner == cowner) dnnz[i]++;
1793       else onnz[i]++;
1794     }
1795   }
1796   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1797   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1798   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1799   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1800   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1802   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1803 
1804   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1805   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1806   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1807   for (i=0; i<m; i++) {
1808     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1809     PetscInt rowlen;
1810     rowlen = ai[i+1] - ai[i];
1811     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1812     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1813     rowlen = bi[i+1] - bi[i];
1814     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1815     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1816   }
1817   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1818   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1819   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1820   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1821   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1822   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1823   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1824   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1825   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1826   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1827   *B = Aperm;
1828   PetscFunctionReturn(0);
1829 }
1830 
1831 #undef __FUNCT__
1832 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1833 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1834 {
1835   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1836   Mat            A = mat->A,B = mat->B;
1837   PetscErrorCode ierr;
1838   PetscReal      isend[5],irecv[5];
1839 
1840   PetscFunctionBegin;
1841   info->block_size     = 1.0;
1842   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1843   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1844   isend[3] = info->memory;  isend[4] = info->mallocs;
1845   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1846   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1847   isend[3] += info->memory;  isend[4] += info->mallocs;
1848   if (flag == MAT_LOCAL) {
1849     info->nz_used      = isend[0];
1850     info->nz_allocated = isend[1];
1851     info->nz_unneeded  = isend[2];
1852     info->memory       = isend[3];
1853     info->mallocs      = isend[4];
1854   } else if (flag == MAT_GLOBAL_MAX) {
1855     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1856     info->nz_used      = irecv[0];
1857     info->nz_allocated = irecv[1];
1858     info->nz_unneeded  = irecv[2];
1859     info->memory       = irecv[3];
1860     info->mallocs      = irecv[4];
1861   } else if (flag == MAT_GLOBAL_SUM) {
1862     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1863     info->nz_used      = irecv[0];
1864     info->nz_allocated = irecv[1];
1865     info->nz_unneeded  = irecv[2];
1866     info->memory       = irecv[3];
1867     info->mallocs      = irecv[4];
1868   }
1869   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1870   info->fill_ratio_needed = 0;
1871   info->factor_mallocs    = 0;
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 #undef __FUNCT__
1876 #define __FUNCT__ "MatSetOption_MPIAIJ"
1877 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1878 {
1879   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1880   PetscErrorCode ierr;
1881 
1882   PetscFunctionBegin;
1883   switch (op) {
1884   case MAT_NEW_NONZERO_LOCATIONS:
1885   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1886   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1887   case MAT_KEEP_NONZERO_PATTERN:
1888   case MAT_NEW_NONZERO_LOCATION_ERR:
1889   case MAT_USE_INODES:
1890   case MAT_IGNORE_ZERO_ENTRIES:
1891     MatCheckPreallocated(A,1);
1892     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1893     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1894     break;
1895   case MAT_ROW_ORIENTED:
1896     a->roworiented = flg;
1897     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1898     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1899     break;
1900   case MAT_NEW_DIAGONALS:
1901     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1902     break;
1903   case MAT_IGNORE_OFF_PROC_ENTRIES:
1904     a->donotstash = flg;
1905     break;
1906   case MAT_SPD:
1907     A->spd_set                         = PETSC_TRUE;
1908     A->spd                             = flg;
1909     if (flg) {
1910       A->symmetric                     = PETSC_TRUE;
1911       A->structurally_symmetric        = PETSC_TRUE;
1912       A->symmetric_set                 = PETSC_TRUE;
1913       A->structurally_symmetric_set    = PETSC_TRUE;
1914     }
1915     break;
1916   case MAT_SYMMETRIC:
1917     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1918     break;
1919   case MAT_STRUCTURALLY_SYMMETRIC:
1920     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1921     break;
1922   case MAT_HERMITIAN:
1923     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1924     break;
1925   case MAT_SYMMETRY_ETERNAL:
1926     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1927     break;
1928   default:
1929     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1930   }
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 #undef __FUNCT__
1935 #define __FUNCT__ "MatGetRow_MPIAIJ"
1936 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1937 {
1938   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1939   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1940   PetscErrorCode ierr;
1941   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1942   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1943   PetscInt       *cmap,*idx_p;
1944 
1945   PetscFunctionBegin;
1946   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1947   mat->getrowactive = PETSC_TRUE;
1948 
1949   if (!mat->rowvalues && (idx || v)) {
1950     /*
1951         allocate enough space to hold information from the longest row.
1952     */
1953     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1954     PetscInt   max = 1,tmp;
1955     for (i=0; i<matin->rmap->n; i++) {
1956       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1957       if (max < tmp) { max = tmp; }
1958     }
1959     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1960   }
1961 
1962   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1963   lrow = row - rstart;
1964 
1965   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1966   if (!v)   {pvA = 0; pvB = 0;}
1967   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1968   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1969   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1970   nztot = nzA + nzB;
1971 
1972   cmap  = mat->garray;
1973   if (v  || idx) {
1974     if (nztot) {
1975       /* Sort by increasing column numbers, assuming A and B already sorted */
1976       PetscInt imark = -1;
1977       if (v) {
1978         *v = v_p = mat->rowvalues;
1979         for (i=0; i<nzB; i++) {
1980           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1981           else break;
1982         }
1983         imark = i;
1984         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1985         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1986       }
1987       if (idx) {
1988         *idx = idx_p = mat->rowindices;
1989         if (imark > -1) {
1990           for (i=0; i<imark; i++) {
1991             idx_p[i] = cmap[cworkB[i]];
1992           }
1993         } else {
1994           for (i=0; i<nzB; i++) {
1995             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1996             else break;
1997           }
1998           imark = i;
1999         }
2000         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
2001         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
2002       }
2003     } else {
2004       if (idx) *idx = 0;
2005       if (v)   *v   = 0;
2006     }
2007   }
2008   *nz = nztot;
2009   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
2010   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 #undef __FUNCT__
2015 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
2016 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2017 {
2018   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2019 
2020   PetscFunctionBegin;
2021   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2022   aij->getrowactive = PETSC_FALSE;
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "MatNorm_MPIAIJ"
2028 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2029 {
2030   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2031   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2032   PetscErrorCode ierr;
2033   PetscInt       i,j,cstart = mat->cmap->rstart;
2034   PetscReal      sum = 0.0;
2035   MatScalar      *v;
2036 
2037   PetscFunctionBegin;
2038   if (aij->size == 1) {
2039     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2040   } else {
2041     if (type == NORM_FROBENIUS) {
2042       v = amat->a;
2043       for (i=0; i<amat->nz; i++) {
2044         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2045       }
2046       v = bmat->a;
2047       for (i=0; i<bmat->nz; i++) {
2048         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2049       }
2050       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2051       *norm = PetscSqrtReal(*norm);
2052     } else if (type == NORM_1) { /* max column norm */
2053       PetscReal *tmp,*tmp2;
2054       PetscInt  *jj,*garray = aij->garray;
2055       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2056       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2057       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2058       *norm = 0.0;
2059       v = amat->a; jj = amat->j;
2060       for (j=0; j<amat->nz; j++) {
2061         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2062       }
2063       v = bmat->a; jj = bmat->j;
2064       for (j=0; j<bmat->nz; j++) {
2065         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2066       }
2067       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2068       for (j=0; j<mat->cmap->N; j++) {
2069         if (tmp2[j] > *norm) *norm = tmp2[j];
2070       }
2071       ierr = PetscFree(tmp);CHKERRQ(ierr);
2072       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2073     } else if (type == NORM_INFINITY) { /* max row norm */
2074       PetscReal ntemp = 0.0;
2075       for (j=0; j<aij->A->rmap->n; j++) {
2076         v = amat->a + amat->i[j];
2077         sum = 0.0;
2078         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2079           sum += PetscAbsScalar(*v); v++;
2080         }
2081         v = bmat->a + bmat->i[j];
2082         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2083           sum += PetscAbsScalar(*v); v++;
2084         }
2085         if (sum > ntemp) ntemp = sum;
2086       }
2087       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2088     } else {
2089       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2090     }
2091   }
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatTranspose_MPIAIJ"
2097 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2098 {
2099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2100   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2101   PetscErrorCode ierr;
2102   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2103   PetscInt       cstart=A->cmap->rstart,ncol;
2104   Mat            B;
2105   MatScalar      *array;
2106 
2107   PetscFunctionBegin;
2108   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2109 
2110   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2111   ai = Aloc->i; aj = Aloc->j;
2112   bi = Bloc->i; bj = Bloc->j;
2113   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2114     PetscInt *d_nnz,*g_nnz,*o_nnz;
2115     PetscSFNode *oloc;
2116     PETSC_UNUSED PetscSF sf;
2117 
2118     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2119     /* compute d_nnz for preallocation */
2120     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2121     for (i=0; i<ai[ma]; i++) {
2122       d_nnz[aj[i]] ++;
2123       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2124     }
2125     /* compute local off-diagonal contributions */
2126     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2127     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2128     /* map those to global */
2129     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2130     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2131     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2132     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2133     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2134     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2135     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2136 
2137     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2138     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2139     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2140     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2141     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2142     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2143   } else {
2144     B = *matout;
2145     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2146     for (i=0; i<ai[ma]; i++) {
2147       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2148     }
2149   }
2150 
2151   /* copy over the A part */
2152   array = Aloc->a;
2153   row = A->rmap->rstart;
2154   for (i=0; i<ma; i++) {
2155     ncol = ai[i+1]-ai[i];
2156     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2157     row++; array += ncol; aj += ncol;
2158   }
2159   aj = Aloc->j;
2160   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2161 
2162   /* copy over the B part */
2163   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2164   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2165   array = Bloc->a;
2166   row = A->rmap->rstart;
2167   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2168   cols_tmp = cols;
2169   for (i=0; i<mb; i++) {
2170     ncol = bi[i+1]-bi[i];
2171     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2172     row++; array += ncol; cols_tmp += ncol;
2173   }
2174   ierr = PetscFree(cols);CHKERRQ(ierr);
2175 
2176   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2177   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2178   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2179     *matout = B;
2180   } else {
2181     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2182   }
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 #undef __FUNCT__
2187 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2188 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2189 {
2190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2191   Mat            a = aij->A,b = aij->B;
2192   PetscErrorCode ierr;
2193   PetscInt       s1,s2,s3;
2194 
2195   PetscFunctionBegin;
2196   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2197   if (rr) {
2198     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2199     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2200     /* Overlap communication with computation. */
2201     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2202   }
2203   if (ll) {
2204     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2205     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2206     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2207   }
2208   /* scale  the diagonal block */
2209   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2210 
2211   if (rr) {
2212     /* Do a scatter end and then right scale the off-diagonal block */
2213     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2214     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2215   }
2216 
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 #undef __FUNCT__
2221 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2222 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2223 {
2224   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2225   PetscErrorCode ierr;
2226 
2227   PetscFunctionBegin;
2228   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatEqual_MPIAIJ"
2234 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2235 {
2236   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2237   Mat            a,b,c,d;
2238   PetscBool      flg;
2239   PetscErrorCode ierr;
2240 
2241   PetscFunctionBegin;
2242   a = matA->A; b = matA->B;
2243   c = matB->A; d = matB->B;
2244 
2245   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2246   if (flg) {
2247     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2248   }
2249   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatCopy_MPIAIJ"
2255 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2256 {
2257   PetscErrorCode ierr;
2258   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2259   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2260 
2261   PetscFunctionBegin;
2262   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2263   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2264     /* because of the column compression in the off-processor part of the matrix a->B,
2265        the number of columns in a->B and b->B may be different, hence we cannot call
2266        the MatCopy() directly on the two parts. If need be, we can provide a more
2267        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2268        then copying the submatrices */
2269     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2270   } else {
2271     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2272     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2273   }
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 #undef __FUNCT__
2278 #define __FUNCT__ "MatSetUp_MPIAIJ"
2279 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2280 {
2281   PetscErrorCode ierr;
2282 
2283   PetscFunctionBegin;
2284   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 #undef __FUNCT__
2289 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2290 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2291 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2292 {
2293   PetscInt          i,m=Y->rmap->N;
2294   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2295   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2296   const PetscInt    *xi = x->i,*yi = y->i;
2297 
2298   PetscFunctionBegin;
2299   /* Set the number of nonzeros in the new matrix */
2300   for (i=0; i<m; i++) {
2301     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2302     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2303     nnz[i] = 0;
2304     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2305       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2306       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2307       nnz[i]++;
2308     }
2309     for (; k<nzy; k++) nnz[i]++;
2310   }
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatAXPY_MPIAIJ"
2316 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2317 {
2318   PetscErrorCode ierr;
2319   PetscInt       i;
2320   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2321   PetscBLASInt   bnz,one=1;
2322   Mat_SeqAIJ     *x,*y;
2323 
2324   PetscFunctionBegin;
2325   if (str == SAME_NONZERO_PATTERN) {
2326     PetscScalar alpha = a;
2327     x = (Mat_SeqAIJ *)xx->A->data;
2328     y = (Mat_SeqAIJ *)yy->A->data;
2329     bnz = PetscBLASIntCast(x->nz);
2330     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2331     x = (Mat_SeqAIJ *)xx->B->data;
2332     y = (Mat_SeqAIJ *)yy->B->data;
2333     bnz = PetscBLASIntCast(x->nz);
2334     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2335   } else if (str == SUBSET_NONZERO_PATTERN) {
2336     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2337 
2338     x = (Mat_SeqAIJ *)xx->B->data;
2339     y = (Mat_SeqAIJ *)yy->B->data;
2340     if (y->xtoy && y->XtoY != xx->B) {
2341       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2342       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2343     }
2344     if (!y->xtoy) { /* get xtoy */
2345       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2346       y->XtoY = xx->B;
2347       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2348     }
2349     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2350   } else {
2351     Mat B;
2352     PetscInt *nnz_d,*nnz_o;
2353     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2354     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2355     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2356     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2357     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2358     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2359     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2360     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2361     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2362     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2363     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2364     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2365     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2366     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2367   }
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2372 
2373 #undef __FUNCT__
2374 #define __FUNCT__ "MatConjugate_MPIAIJ"
2375 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2376 {
2377 #if defined(PETSC_USE_COMPLEX)
2378   PetscErrorCode ierr;
2379   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2380 
2381   PetscFunctionBegin;
2382   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2383   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2384 #else
2385   PetscFunctionBegin;
2386 #endif
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatRealPart_MPIAIJ"
2392 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2393 {
2394   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2395   PetscErrorCode ierr;
2396 
2397   PetscFunctionBegin;
2398   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2399   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2405 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2406 {
2407   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2408   PetscErrorCode ierr;
2409 
2410   PetscFunctionBegin;
2411   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2412   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 #ifdef PETSC_HAVE_PBGL
2417 
2418 #include <boost/parallel/mpi/bsp_process_group.hpp>
2419 #include <boost/graph/distributed/ilu_default_graph.hpp>
2420 #include <boost/graph/distributed/ilu_0_block.hpp>
2421 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2422 #include <boost/graph/distributed/petsc/interface.hpp>
2423 #include <boost/multi_array.hpp>
2424 #include <boost/parallel/distributed_property_map->hpp>
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2428 /*
2429   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2430 */
2431 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2432 {
2433   namespace petsc = boost::distributed::petsc;
2434 
2435   namespace graph_dist = boost::graph::distributed;
2436   using boost::graph::distributed::ilu_default::process_group_type;
2437   using boost::graph::ilu_permuted;
2438 
2439   PetscBool       row_identity, col_identity;
2440   PetscContainer  c;
2441   PetscInt        m, n, M, N;
2442   PetscErrorCode  ierr;
2443 
2444   PetscFunctionBegin;
2445   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2446   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2447   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2448   if (!row_identity || !col_identity) {
2449     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2450   }
2451 
2452   process_group_type pg;
2453   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2454   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2455   lgraph_type&   level_graph = *lgraph_p;
2456   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2457 
2458   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2459   ilu_permuted(level_graph);
2460 
2461   /* put together the new matrix */
2462   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2463   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2464   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2465   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2466   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2467   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2468   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2469   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2470 
2471   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2472   ierr = PetscContainerSetPointer(c, lgraph_p);
2473   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2474   ierr = PetscContainerDestroy(&c);
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2480 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2481 {
2482   PetscFunctionBegin;
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatSolve_MPIAIJ"
2488 /*
2489   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2490 */
2491 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2492 {
2493   namespace graph_dist = boost::graph::distributed;
2494 
2495   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2496   lgraph_type*   lgraph_p;
2497   PetscContainer c;
2498   PetscErrorCode ierr;
2499 
2500   PetscFunctionBegin;
2501   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2502   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2503   ierr = VecCopy(b, x);CHKERRQ(ierr);
2504 
2505   PetscScalar* array_x;
2506   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2507   PetscInt sx;
2508   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2509 
2510   PetscScalar* array_b;
2511   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2512   PetscInt sb;
2513   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2514 
2515   lgraph_type&   level_graph = *lgraph_p;
2516   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2517 
2518   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2519   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2520                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2521 
2522   typedef boost::iterator_property_map<array_ref_type::iterator,
2523                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2524   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2525                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2526 
2527   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2528 
2529   PetscFunctionReturn(0);
2530 }
2531 #endif
2532 
2533 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2534   PetscInt       nzlocal,nsends,nrecvs;
2535   PetscMPIInt    *send_rank,*recv_rank;
2536   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2537   PetscScalar    *sbuf_a,**rbuf_a;
2538   PetscErrorCode (*Destroy)(Mat);
2539 } Mat_Redundant;
2540 
2541 #undef __FUNCT__
2542 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2543 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2544 {
2545   PetscErrorCode       ierr;
2546   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2547   PetscInt             i;
2548 
2549   PetscFunctionBegin;
2550   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2551   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2552   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2553   for (i=0; i<redund->nrecvs; i++) {
2554     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2555     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2556   }
2557   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2558   ierr = PetscFree(redund);CHKERRQ(ierr);
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatDestroy_MatRedundant"
2564 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2565 {
2566   PetscErrorCode  ierr;
2567   PetscContainer  container;
2568   Mat_Redundant   *redund=PETSC_NULL;
2569 
2570   PetscFunctionBegin;
2571   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2572   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2573   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2574   A->ops->destroy = redund->Destroy;
2575   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2576   if (A->ops->destroy) {
2577     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2578   }
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 #undef __FUNCT__
2583 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2584 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2585 {
2586   PetscMPIInt    rank,size;
2587   MPI_Comm       comm=((PetscObject)mat)->comm;
2588   PetscErrorCode ierr;
2589   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2590   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2591   PetscInt       *rowrange=mat->rmap->range;
2592   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2593   Mat            A=aij->A,B=aij->B,C=*matredundant;
2594   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2595   PetscScalar    *sbuf_a;
2596   PetscInt       nzlocal=a->nz+b->nz;
2597   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2598   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2599   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2600   MatScalar      *aworkA,*aworkB;
2601   PetscScalar    *vals;
2602   PetscMPIInt    tag1,tag2,tag3,imdex;
2603   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2604                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2605   MPI_Status     recv_status,*send_status;
2606   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2607   PetscInt       **rbuf_j=PETSC_NULL;
2608   PetscScalar    **rbuf_a=PETSC_NULL;
2609   Mat_Redundant  *redund=PETSC_NULL;
2610   PetscContainer container;
2611 
2612   PetscFunctionBegin;
2613   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2614   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2615 
2616   if (reuse == MAT_REUSE_MATRIX) {
2617     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2618     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2619     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2620     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2621     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2622     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2623     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2624     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2625 
2626     nsends    = redund->nsends;
2627     nrecvs    = redund->nrecvs;
2628     send_rank = redund->send_rank;
2629     recv_rank = redund->recv_rank;
2630     sbuf_nz   = redund->sbuf_nz;
2631     rbuf_nz   = redund->rbuf_nz;
2632     sbuf_j    = redund->sbuf_j;
2633     sbuf_a    = redund->sbuf_a;
2634     rbuf_j    = redund->rbuf_j;
2635     rbuf_a    = redund->rbuf_a;
2636   }
2637 
2638   if (reuse == MAT_INITIAL_MATRIX) {
2639     PetscMPIInt  subrank,subsize;
2640     PetscInt     nleftover,np_subcomm;
2641     /* get the destination processors' id send_rank, nsends and nrecvs */
2642     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2643     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2644     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2645     np_subcomm = size/nsubcomm;
2646     nleftover  = size - nsubcomm*np_subcomm;
2647     nsends = 0; nrecvs = 0;
2648     for (i=0; i<size; i++) { /* i=rank*/
2649       if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2650         send_rank[nsends] = i; nsends++;
2651         recv_rank[nrecvs++] = i;
2652       }
2653     }
2654     if (rank >= size - nleftover) {/* this proc is a leftover processor */
2655       i = size-nleftover-1;
2656       j = 0;
2657       while (j < nsubcomm - nleftover) {
2658         send_rank[nsends++] = i;
2659         i--; j++;
2660       }
2661     }
2662 
2663     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2664       for (i=0; i<nleftover; i++) {
2665         recv_rank[nrecvs++] = size-nleftover+i;
2666       }
2667     }
2668 
2669     /* allocate sbuf_j, sbuf_a */
2670     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2671     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2672     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2673   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2674 
2675   /* copy mat's local entries into the buffers */
2676   if (reuse == MAT_INITIAL_MATRIX) {
2677     rownz_max = 0;
2678     rptr = sbuf_j;
2679     cols = sbuf_j + rend-rstart + 1;
2680     vals = sbuf_a;
2681     rptr[0] = 0;
2682     for (i=0; i<rend-rstart; i++) {
2683       row = i + rstart;
2684       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2685       ncols  = nzA + nzB;
2686       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2687       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2688       /* load the column indices for this row into cols */
2689       lwrite = 0;
2690       for (l=0; l<nzB; l++) {
2691         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2692           vals[lwrite]   = aworkB[l];
2693           cols[lwrite++] = ctmp;
2694         }
2695       }
2696       for (l=0; l<nzA; l++) {
2697         vals[lwrite]   = aworkA[l];
2698         cols[lwrite++] = cstart + cworkA[l];
2699       }
2700       for (l=0; l<nzB; l++) {
2701         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2702           vals[lwrite]   = aworkB[l];
2703           cols[lwrite++] = ctmp;
2704         }
2705       }
2706       vals += ncols;
2707       cols += ncols;
2708       rptr[i+1] = rptr[i] + ncols;
2709       if (rownz_max < ncols) rownz_max = ncols;
2710     }
2711     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2712   } else { /* only copy matrix values into sbuf_a */
2713     rptr = sbuf_j;
2714     vals = sbuf_a;
2715     rptr[0] = 0;
2716     for (i=0; i<rend-rstart; i++) {
2717       row = i + rstart;
2718       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2719       ncols  = nzA + nzB;
2720       cworkB = b->j + b->i[i];
2721       aworkA = a->a + a->i[i];
2722       aworkB = b->a + b->i[i];
2723       lwrite = 0;
2724       for (l=0; l<nzB; l++) {
2725         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2726       }
2727       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2728       for (l=0; l<nzB; l++) {
2729         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2730       }
2731       vals += ncols;
2732       rptr[i+1] = rptr[i] + ncols;
2733     }
2734   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2735 
2736   /* send nzlocal to others, and recv other's nzlocal */
2737   /*--------------------------------------------------*/
2738   if (reuse == MAT_INITIAL_MATRIX) {
2739     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2740     s_waits2 = s_waits3 + nsends;
2741     s_waits1 = s_waits2 + nsends;
2742     r_waits1 = s_waits1 + nsends;
2743     r_waits2 = r_waits1 + nrecvs;
2744     r_waits3 = r_waits2 + nrecvs;
2745   } else {
2746     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2747     r_waits3 = s_waits3 + nsends;
2748   }
2749 
2750   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2751   if (reuse == MAT_INITIAL_MATRIX) {
2752     /* get new tags to keep the communication clean */
2753     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2754     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2755     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2756 
2757     /* post receives of other's nzlocal */
2758     for (i=0; i<nrecvs; i++) {
2759       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2760     }
2761     /* send nzlocal to others */
2762     for (i=0; i<nsends; i++) {
2763       sbuf_nz[i] = nzlocal;
2764       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2765     }
2766     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2767     count = nrecvs;
2768     while (count) {
2769       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2770       recv_rank[imdex] = recv_status.MPI_SOURCE;
2771       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2772       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2773 
2774       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2775       rbuf_nz[imdex] += i + 2;
2776       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2777       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2778       count--;
2779     }
2780     /* wait on sends of nzlocal */
2781     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2782     /* send mat->i,j to others, and recv from other's */
2783     /*------------------------------------------------*/
2784     for (i=0; i<nsends; i++) {
2785       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2786       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2787     }
2788     /* wait on receives of mat->i,j */
2789     /*------------------------------*/
2790     count = nrecvs;
2791     while (count) {
2792       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2793       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2794       count--;
2795     }
2796     /* wait on sends of mat->i,j */
2797     /*---------------------------*/
2798     if (nsends) {
2799       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2800     }
2801   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2802 
2803   /* post receives, send and receive mat->a */
2804   /*----------------------------------------*/
2805   for (imdex=0; imdex<nrecvs; imdex++) {
2806     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2807   }
2808   for (i=0; i<nsends; i++) {
2809     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2810   }
2811   count = nrecvs;
2812   while (count) {
2813     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2814     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2815     count--;
2816   }
2817   if (nsends) {
2818     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2819   }
2820 
2821   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2822 
2823   /* create redundant matrix */
2824   /*-------------------------*/
2825   if (reuse == MAT_INITIAL_MATRIX) {
2826     /* compute rownz_max for preallocation */
2827     for (imdex=0; imdex<nrecvs; imdex++) {
2828       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2829       rptr = rbuf_j[imdex];
2830       for (i=0; i<j; i++) {
2831         ncols = rptr[i+1] - rptr[i];
2832         if (rownz_max < ncols) rownz_max = ncols;
2833       }
2834     }
2835 
2836     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2837     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2838     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2839     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2840     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2841     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2842   } else {
2843     C = *matredundant;
2844   }
2845 
2846   /* insert local matrix entries */
2847   rptr = sbuf_j;
2848   cols = sbuf_j + rend-rstart + 1;
2849   vals = sbuf_a;
2850   for (i=0; i<rend-rstart; i++) {
2851     row   = i + rstart;
2852     ncols = rptr[i+1] - rptr[i];
2853     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2854     vals += ncols;
2855     cols += ncols;
2856   }
2857   /* insert received matrix entries */
2858   for (imdex=0; imdex<nrecvs; imdex++) {
2859     rstart = rowrange[recv_rank[imdex]];
2860     rend   = rowrange[recv_rank[imdex]+1];
2861     rptr = rbuf_j[imdex];
2862     cols = rbuf_j[imdex] + rend-rstart + 1;
2863     vals = rbuf_a[imdex];
2864     for (i=0; i<rend-rstart; i++) {
2865       row   = i + rstart;
2866       ncols = rptr[i+1] - rptr[i];
2867       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2868       vals += ncols;
2869       cols += ncols;
2870     }
2871   }
2872   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2873   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2874   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2875   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2876   if (reuse == MAT_INITIAL_MATRIX) {
2877     PetscContainer container;
2878     *matredundant = C;
2879     /* create a supporting struct and attach it to C for reuse */
2880     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2881     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2882     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2883     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2884     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2885     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2886 
2887     redund->nzlocal = nzlocal;
2888     redund->nsends  = nsends;
2889     redund->nrecvs  = nrecvs;
2890     redund->send_rank = send_rank;
2891     redund->recv_rank = recv_rank;
2892     redund->sbuf_nz = sbuf_nz;
2893     redund->rbuf_nz = rbuf_nz;
2894     redund->sbuf_j  = sbuf_j;
2895     redund->sbuf_a  = sbuf_a;
2896     redund->rbuf_j  = rbuf_j;
2897     redund->rbuf_a  = rbuf_a;
2898 
2899     redund->Destroy = C->ops->destroy;
2900     C->ops->destroy = MatDestroy_MatRedundant;
2901   }
2902   PetscFunctionReturn(0);
2903 }
2904 
2905 #undef __FUNCT__
2906 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2907 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2908 {
2909   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2910   PetscErrorCode ierr;
2911   PetscInt       i,*idxb = 0;
2912   PetscScalar    *va,*vb;
2913   Vec            vtmp;
2914 
2915   PetscFunctionBegin;
2916   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2917   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2918   if (idx) {
2919     for (i=0; i<A->rmap->n; i++) {
2920       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2921     }
2922   }
2923 
2924   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2925   if (idx) {
2926     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2927   }
2928   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2929   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2930 
2931   for (i=0; i<A->rmap->n; i++) {
2932     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2933       va[i] = vb[i];
2934       if (idx) idx[i] = a->garray[idxb[i]];
2935     }
2936   }
2937 
2938   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2939   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2940   ierr = PetscFree(idxb);CHKERRQ(ierr);
2941   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2942   PetscFunctionReturn(0);
2943 }
2944 
2945 #undef __FUNCT__
2946 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2947 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2948 {
2949   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2950   PetscErrorCode ierr;
2951   PetscInt       i,*idxb = 0;
2952   PetscScalar    *va,*vb;
2953   Vec            vtmp;
2954 
2955   PetscFunctionBegin;
2956   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2957   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2958   if (idx) {
2959     for (i=0; i<A->cmap->n; i++) {
2960       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2961     }
2962   }
2963 
2964   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2965   if (idx) {
2966     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2967   }
2968   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2969   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2970 
2971   for (i=0; i<A->rmap->n; i++) {
2972     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2973       va[i] = vb[i];
2974       if (idx) idx[i] = a->garray[idxb[i]];
2975     }
2976   }
2977 
2978   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2979   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2980   ierr = PetscFree(idxb);CHKERRQ(ierr);
2981   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2982   PetscFunctionReturn(0);
2983 }
2984 
2985 #undef __FUNCT__
2986 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2987 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2988 {
2989   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2990   PetscInt       n      = A->rmap->n;
2991   PetscInt       cstart = A->cmap->rstart;
2992   PetscInt      *cmap   = mat->garray;
2993   PetscInt      *diagIdx, *offdiagIdx;
2994   Vec            diagV, offdiagV;
2995   PetscScalar   *a, *diagA, *offdiagA;
2996   PetscInt       r;
2997   PetscErrorCode ierr;
2998 
2999   PetscFunctionBegin;
3000   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3001   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
3002   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
3003   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3004   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3005   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3006   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3007   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3008   for (r = 0; r < n; ++r) {
3009     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3010       a[r]   = diagA[r];
3011       idx[r] = cstart + diagIdx[r];
3012     } else {
3013       a[r]   = offdiagA[r];
3014       idx[r] = cmap[offdiagIdx[r]];
3015     }
3016   }
3017   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3018   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3019   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3020   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3021   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3022   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3023   PetscFunctionReturn(0);
3024 }
3025 
3026 #undef __FUNCT__
3027 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3028 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3029 {
3030   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
3031   PetscInt       n      = A->rmap->n;
3032   PetscInt       cstart = A->cmap->rstart;
3033   PetscInt      *cmap   = mat->garray;
3034   PetscInt      *diagIdx, *offdiagIdx;
3035   Vec            diagV, offdiagV;
3036   PetscScalar   *a, *diagA, *offdiagA;
3037   PetscInt       r;
3038   PetscErrorCode ierr;
3039 
3040   PetscFunctionBegin;
3041   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3042   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3043   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3044   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3045   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3046   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3047   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3048   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3049   for (r = 0; r < n; ++r) {
3050     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3051       a[r]   = diagA[r];
3052       idx[r] = cstart + diagIdx[r];
3053     } else {
3054       a[r]   = offdiagA[r];
3055       idx[r] = cmap[offdiagIdx[r]];
3056     }
3057   }
3058   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3059   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3060   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3061   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3062   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3063   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3064   PetscFunctionReturn(0);
3065 }
3066 
3067 #undef __FUNCT__
3068 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3069 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3070 {
3071   PetscErrorCode ierr;
3072   Mat            *dummy;
3073 
3074   PetscFunctionBegin;
3075   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3076   *newmat = *dummy;
3077   ierr = PetscFree(dummy);CHKERRQ(ierr);
3078   PetscFunctionReturn(0);
3079 }
3080 
3081 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3082 
3083 #undef __FUNCT__
3084 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3085 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3086 {
3087   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3088   PetscErrorCode ierr;
3089 
3090   PetscFunctionBegin;
3091   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3097 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3098 {
3099   PetscErrorCode ierr;
3100   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3101 
3102   PetscFunctionBegin;
3103   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3104   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3105   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3106   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3107   PetscFunctionReturn(0);
3108 }
3109 
3110 /* -------------------------------------------------------------------*/
3111 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3112        MatGetRow_MPIAIJ,
3113        MatRestoreRow_MPIAIJ,
3114        MatMult_MPIAIJ,
3115 /* 4*/ MatMultAdd_MPIAIJ,
3116        MatMultTranspose_MPIAIJ,
3117        MatMultTransposeAdd_MPIAIJ,
3118 #ifdef PETSC_HAVE_PBGL
3119        MatSolve_MPIAIJ,
3120 #else
3121        0,
3122 #endif
3123        0,
3124        0,
3125 /*10*/ 0,
3126        0,
3127        0,
3128        MatSOR_MPIAIJ,
3129        MatTranspose_MPIAIJ,
3130 /*15*/ MatGetInfo_MPIAIJ,
3131        MatEqual_MPIAIJ,
3132        MatGetDiagonal_MPIAIJ,
3133        MatDiagonalScale_MPIAIJ,
3134        MatNorm_MPIAIJ,
3135 /*20*/ MatAssemblyBegin_MPIAIJ,
3136        MatAssemblyEnd_MPIAIJ,
3137        MatSetOption_MPIAIJ,
3138        MatZeroEntries_MPIAIJ,
3139 /*24*/ MatZeroRows_MPIAIJ,
3140        0,
3141 #ifdef PETSC_HAVE_PBGL
3142        0,
3143 #else
3144        0,
3145 #endif
3146        0,
3147        0,
3148 /*29*/ MatSetUp_MPIAIJ,
3149 #ifdef PETSC_HAVE_PBGL
3150        0,
3151 #else
3152        0,
3153 #endif
3154        0,
3155        0,
3156        0,
3157 /*34*/ MatDuplicate_MPIAIJ,
3158        0,
3159        0,
3160        0,
3161        0,
3162 /*39*/ MatAXPY_MPIAIJ,
3163        MatGetSubMatrices_MPIAIJ,
3164        MatIncreaseOverlap_MPIAIJ,
3165        MatGetValues_MPIAIJ,
3166        MatCopy_MPIAIJ,
3167 /*44*/ MatGetRowMax_MPIAIJ,
3168        MatScale_MPIAIJ,
3169        0,
3170        0,
3171        MatZeroRowsColumns_MPIAIJ,
3172 /*49*/ MatSetRandom_MPIAIJ,
3173        0,
3174        0,
3175        0,
3176        0,
3177 /*54*/ MatFDColoringCreate_MPIAIJ,
3178        0,
3179        MatSetUnfactored_MPIAIJ,
3180        MatPermute_MPIAIJ,
3181        0,
3182 /*59*/ MatGetSubMatrix_MPIAIJ,
3183        MatDestroy_MPIAIJ,
3184        MatView_MPIAIJ,
3185        0,
3186        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3187 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3188        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3189        0,
3190        0,
3191        0,
3192 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3193        MatGetRowMinAbs_MPIAIJ,
3194        0,
3195        MatSetColoring_MPIAIJ,
3196        0,
3197        MatSetValuesAdifor_MPIAIJ,
3198 /*75*/ MatFDColoringApply_AIJ,
3199        0,
3200        0,
3201        0,
3202        MatFindZeroDiagonals_MPIAIJ,
3203 /*80*/ 0,
3204        0,
3205        0,
3206 /*83*/ MatLoad_MPIAIJ,
3207        0,
3208        0,
3209        0,
3210        0,
3211        0,
3212 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3213        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3214        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3215        MatPtAP_MPIAIJ_MPIAIJ,
3216        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3217 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3218        0,
3219        0,
3220        0,
3221        0,
3222 /*99*/ 0,
3223        0,
3224        0,
3225        MatConjugate_MPIAIJ,
3226        0,
3227 /*104*/MatSetValuesRow_MPIAIJ,
3228        MatRealPart_MPIAIJ,
3229        MatImaginaryPart_MPIAIJ,
3230        0,
3231        0,
3232 /*109*/0,
3233        MatGetRedundantMatrix_MPIAIJ,
3234        MatGetRowMin_MPIAIJ,
3235        0,
3236        0,
3237 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3238        0,
3239        0,
3240        0,
3241        0,
3242 /*119*/0,
3243        0,
3244        0,
3245        0,
3246        MatGetMultiProcBlock_MPIAIJ,
3247 /*124*/MatFindNonzeroRows_MPIAIJ,
3248        MatGetColumnNorms_MPIAIJ,
3249        MatInvertBlockDiagonal_MPIAIJ,
3250        0,
3251        MatGetSubMatricesParallel_MPIAIJ,
3252 /*129*/0,
3253        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3254        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3255        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3256        0,
3257 /*134*/0,
3258        0,
3259        0,
3260        0,
3261        0
3262 };
3263 
3264 /* ----------------------------------------------------------------------------------------*/
3265 
3266 EXTERN_C_BEGIN
3267 #undef __FUNCT__
3268 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3269 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3270 {
3271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3272   PetscErrorCode ierr;
3273 
3274   PetscFunctionBegin;
3275   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3276   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3277   PetscFunctionReturn(0);
3278 }
3279 EXTERN_C_END
3280 
3281 EXTERN_C_BEGIN
3282 #undef __FUNCT__
3283 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3284 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3285 {
3286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3287   PetscErrorCode ierr;
3288 
3289   PetscFunctionBegin;
3290   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3291   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3292   PetscFunctionReturn(0);
3293 }
3294 EXTERN_C_END
3295 
3296 EXTERN_C_BEGIN
3297 #undef __FUNCT__
3298 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3299 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3300 {
3301   Mat_MPIAIJ     *b;
3302   PetscErrorCode ierr;
3303   PetscInt       i;
3304   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3305 
3306   PetscFunctionBegin;
3307   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3308   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3309   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3310   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3311   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3312   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3313 
3314   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3315   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3316   if (d_nnz) {
3317     for (i=0; i<B->rmap->n; i++) {
3318       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3319     }
3320   }
3321   if (o_nnz) {
3322     for (i=0; i<B->rmap->n; i++) {
3323       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3324     }
3325   }
3326   b = (Mat_MPIAIJ*)B->data;
3327 
3328   if (!B->preallocated) {
3329     /* Explicitly create 2 MATSEQAIJ matrices. */
3330     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3331     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3332     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3333     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3334     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3335     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3336     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3337     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3338     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3339     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3340   }
3341 
3342   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3343   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3344   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3345   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3346   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3347   B->preallocated = PETSC_TRUE;
3348   PetscFunctionReturn(0);
3349 }
3350 EXTERN_C_END
3351 
3352 #undef __FUNCT__
3353 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3354 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3355 {
3356   Mat            mat;
3357   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3358   PetscErrorCode ierr;
3359 
3360   PetscFunctionBegin;
3361   *newmat       = 0;
3362   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3363   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3364   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3365   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3366   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3367   a    = (Mat_MPIAIJ*)mat->data;
3368 
3369   mat->factortype    = matin->factortype;
3370   mat->rmap->bs      = matin->rmap->bs;
3371   mat->cmap->bs      = matin->cmap->bs;
3372   mat->assembled    = PETSC_TRUE;
3373   mat->insertmode   = NOT_SET_VALUES;
3374   mat->preallocated = PETSC_TRUE;
3375 
3376   a->size           = oldmat->size;
3377   a->rank           = oldmat->rank;
3378   a->donotstash     = oldmat->donotstash;
3379   a->roworiented    = oldmat->roworiented;
3380   a->rowindices     = 0;
3381   a->rowvalues      = 0;
3382   a->getrowactive   = PETSC_FALSE;
3383 
3384   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3385   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3386 
3387   if (oldmat->colmap) {
3388 #if defined (PETSC_USE_CTABLE)
3389     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3390 #else
3391     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3392     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3393     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3394 #endif
3395   } else a->colmap = 0;
3396   if (oldmat->garray) {
3397     PetscInt len;
3398     len  = oldmat->B->cmap->n;
3399     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3400     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3401     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3402   } else a->garray = 0;
3403 
3404   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3405   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3406   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3407   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3408   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3409   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3410   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3411   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3412   ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3413   *newmat = mat;
3414   PetscFunctionReturn(0);
3415 }
3416 
3417 
3418 
3419 #undef __FUNCT__
3420 #define __FUNCT__ "MatLoad_MPIAIJ"
3421 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3422 {
3423   PetscScalar    *vals,*svals;
3424   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3425   PetscErrorCode ierr;
3426   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3427   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3428   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3429   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3430   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3431   int            fd;
3432   PetscInt       bs = 1;
3433 
3434   PetscFunctionBegin;
3435   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3436   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3437   if (!rank) {
3438     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3439     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3440     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3441   }
3442 
3443   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3444   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3445   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3446 
3447   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3448 
3449   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3450   M = header[1]; N = header[2];
3451   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3452   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3453   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3454 
3455   /* If global sizes are set, check if they are consistent with that given in the file */
3456   if (sizesset) {
3457     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3458   }
3459   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3460   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3461 
3462   /* determine ownership of all (block) rows */
3463   if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3464   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3465   else m = newMat->rmap->n; /* Set by user */
3466 
3467   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3468   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3469 
3470   /* First process needs enough room for process with most rows */
3471   if (!rank) {
3472     mmax = rowners[1];
3473     for (i=2; i<=size; i++) {
3474       mmax = PetscMax(mmax, rowners[i]);
3475     }
3476   } else mmax = m;
3477 
3478   rowners[0] = 0;
3479   for (i=2; i<=size; i++) {
3480     rowners[i] += rowners[i-1];
3481   }
3482   rstart = rowners[rank];
3483   rend   = rowners[rank+1];
3484 
3485   /* distribute row lengths to all processors */
3486   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3487   if (!rank) {
3488     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3489     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3490     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3491     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3492     for (j=0; j<m; j++) {
3493       procsnz[0] += ourlens[j];
3494     }
3495     for (i=1; i<size; i++) {
3496       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3497       /* calculate the number of nonzeros on each processor */
3498       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3499         procsnz[i] += rowlengths[j];
3500       }
3501       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3502     }
3503     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3504   } else {
3505     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3506   }
3507 
3508   if (!rank) {
3509     /* determine max buffer needed and allocate it */
3510     maxnz = 0;
3511     for (i=0; i<size; i++) {
3512       maxnz = PetscMax(maxnz,procsnz[i]);
3513     }
3514     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3515 
3516     /* read in my part of the matrix column indices  */
3517     nz   = procsnz[0];
3518     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3519     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3520 
3521     /* read in every one elses and ship off */
3522     for (i=1; i<size; i++) {
3523       nz     = procsnz[i];
3524       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3525       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3526     }
3527     ierr = PetscFree(cols);CHKERRQ(ierr);
3528   } else {
3529     /* determine buffer space needed for message */
3530     nz = 0;
3531     for (i=0; i<m; i++) {
3532       nz += ourlens[i];
3533     }
3534     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3535 
3536     /* receive message of column indices*/
3537     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3538   }
3539 
3540   /* determine column ownership if matrix is not square */
3541   if (N != M) {
3542     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3543     else n = newMat->cmap->n;
3544     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3545     cstart = cend - n;
3546   } else {
3547     cstart = rstart;
3548     cend   = rend;
3549     n      = cend - cstart;
3550   }
3551 
3552   /* loop over local rows, determining number of off diagonal entries */
3553   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3554   jj = 0;
3555   for (i=0; i<m; i++) {
3556     for (j=0; j<ourlens[i]; j++) {
3557       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3558       jj++;
3559     }
3560   }
3561 
3562   for (i=0; i<m; i++) {
3563     ourlens[i] -= offlens[i];
3564   }
3565   if (!sizesset) {
3566     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3567   }
3568 
3569   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3570 
3571   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3572 
3573   for (i=0; i<m; i++) {
3574     ourlens[i] += offlens[i];
3575   }
3576 
3577   if (!rank) {
3578     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3579 
3580     /* read in my part of the matrix numerical values  */
3581     nz   = procsnz[0];
3582     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3583 
3584     /* insert into matrix */
3585     jj      = rstart;
3586     smycols = mycols;
3587     svals   = vals;
3588     for (i=0; i<m; i++) {
3589       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3590       smycols += ourlens[i];
3591       svals   += ourlens[i];
3592       jj++;
3593     }
3594 
3595     /* read in other processors and ship out */
3596     for (i=1; i<size; i++) {
3597       nz     = procsnz[i];
3598       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3599       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3600     }
3601     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3602   } else {
3603     /* receive numeric values */
3604     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3605 
3606     /* receive message of values*/
3607     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3608 
3609     /* insert into matrix */
3610     jj      = rstart;
3611     smycols = mycols;
3612     svals   = vals;
3613     for (i=0; i<m; i++) {
3614       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3615       smycols += ourlens[i];
3616       svals   += ourlens[i];
3617       jj++;
3618     }
3619   }
3620   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3621   ierr = PetscFree(vals);CHKERRQ(ierr);
3622   ierr = PetscFree(mycols);CHKERRQ(ierr);
3623   ierr = PetscFree(rowners);CHKERRQ(ierr);
3624   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3625   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3626 
3627   PetscFunctionReturn(0);
3628 }
3629 
3630 #undef __FUNCT__
3631 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3632 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3633 {
3634   PetscErrorCode ierr;
3635   IS             iscol_local;
3636   PetscInt       csize;
3637 
3638   PetscFunctionBegin;
3639   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3640   if (call == MAT_REUSE_MATRIX) {
3641     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3642     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3643   } else {
3644     PetscInt cbs;
3645     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3646     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3647     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3648   }
3649   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3650   if (call == MAT_INITIAL_MATRIX) {
3651     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3652     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3653   }
3654   PetscFunctionReturn(0);
3655 }
3656 
3657 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3658 #undef __FUNCT__
3659 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3660 /*
3661     Not great since it makes two copies of the submatrix, first an SeqAIJ
3662   in local and then by concatenating the local matrices the end result.
3663   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3664 
3665   Note: This requires a sequential iscol with all indices.
3666 */
3667 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3668 {
3669   PetscErrorCode ierr;
3670   PetscMPIInt    rank,size;
3671   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3672   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3673   PetscBool      allcolumns, colflag;
3674   Mat            M,Mreuse;
3675   MatScalar      *vwork,*aa;
3676   MPI_Comm       comm = ((PetscObject)mat)->comm;
3677   Mat_SeqAIJ     *aij;
3678 
3679 
3680   PetscFunctionBegin;
3681   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3682   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3683 
3684   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3685   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3686   if (colflag && ncol == mat->cmap->N) {
3687     allcolumns = PETSC_TRUE;
3688   } else {
3689     allcolumns = PETSC_FALSE;
3690   }
3691   if (call ==  MAT_REUSE_MATRIX) {
3692     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3693     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3694     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3695   } else {
3696     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3697   }
3698 
3699   /*
3700       m - number of local rows
3701       n - number of columns (same on all processors)
3702       rstart - first row in new global matrix generated
3703   */
3704   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3705   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3706   if (call == MAT_INITIAL_MATRIX) {
3707     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3708     ii  = aij->i;
3709     jj  = aij->j;
3710 
3711     /*
3712         Determine the number of non-zeros in the diagonal and off-diagonal
3713         portions of the matrix in order to do correct preallocation
3714     */
3715 
3716     /* first get start and end of "diagonal" columns */
3717     if (csize == PETSC_DECIDE) {
3718       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3719       if (mglobal == n) { /* square matrix */
3720         nlocal = m;
3721       } else {
3722         nlocal = n/size + ((n % size) > rank);
3723       }
3724     } else {
3725       nlocal = csize;
3726     }
3727     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3728     rstart = rend - nlocal;
3729     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3730 
3731     /* next, compute all the lengths */
3732     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3733     olens = dlens + m;
3734     for (i=0; i<m; i++) {
3735       jend = ii[i+1] - ii[i];
3736       olen = 0;
3737       dlen = 0;
3738       for (j=0; j<jend; j++) {
3739         if (*jj < rstart || *jj >= rend) olen++;
3740         else dlen++;
3741         jj++;
3742       }
3743       olens[i] = olen;
3744       dlens[i] = dlen;
3745     }
3746     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3747     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3748     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3749     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3750     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3751     ierr = PetscFree(dlens);CHKERRQ(ierr);
3752   } else {
3753     PetscInt ml,nl;
3754 
3755     M = *newmat;
3756     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3757     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3758     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3759     /*
3760          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3761        rather than the slower MatSetValues().
3762     */
3763     M->was_assembled = PETSC_TRUE;
3764     M->assembled     = PETSC_FALSE;
3765   }
3766   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3767   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3768   ii  = aij->i;
3769   jj  = aij->j;
3770   aa  = aij->a;
3771   for (i=0; i<m; i++) {
3772     row   = rstart + i;
3773     nz    = ii[i+1] - ii[i];
3774     cwork = jj;     jj += nz;
3775     vwork = aa;     aa += nz;
3776     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3777   }
3778 
3779   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3780   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3781   *newmat = M;
3782 
3783   /* save submatrix used in processor for next request */
3784   if (call ==  MAT_INITIAL_MATRIX) {
3785     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3786     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3787   }
3788 
3789   PetscFunctionReturn(0);
3790 }
3791 
3792 EXTERN_C_BEGIN
3793 #undef __FUNCT__
3794 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3795 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3796 {
3797   PetscInt       m,cstart, cend,j,nnz,i,d;
3798   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3799   const PetscInt *JJ;
3800   PetscScalar    *values;
3801   PetscErrorCode ierr;
3802 
3803   PetscFunctionBegin;
3804   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3805 
3806   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3807   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3808   m      = B->rmap->n;
3809   cstart = B->cmap->rstart;
3810   cend   = B->cmap->rend;
3811   rstart = B->rmap->rstart;
3812 
3813   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3814 
3815 #if defined(PETSC_USE_DEBUGGING)
3816   for (i=0; i<m; i++) {
3817     nnz     = Ii[i+1]- Ii[i];
3818     JJ      = J + Ii[i];
3819     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3820     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3821     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3822   }
3823 #endif
3824 
3825   for (i=0; i<m; i++) {
3826     nnz     = Ii[i+1]- Ii[i];
3827     JJ      = J + Ii[i];
3828     nnz_max = PetscMax(nnz_max,nnz);
3829     d       = 0;
3830     for (j=0; j<nnz; j++) {
3831       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3832     }
3833     d_nnz[i] = d;
3834     o_nnz[i] = nnz - d;
3835   }
3836   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3837   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3838 
3839   if (v) values = (PetscScalar*)v;
3840   else {
3841     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3842     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3843   }
3844 
3845   for (i=0; i<m; i++) {
3846     ii   = i + rstart;
3847     nnz  = Ii[i+1]- Ii[i];
3848     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3849   }
3850   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3851   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3852 
3853   if (!v) {
3854     ierr = PetscFree(values);CHKERRQ(ierr);
3855   }
3856   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3857   PetscFunctionReturn(0);
3858 }
3859 EXTERN_C_END
3860 
3861 #undef __FUNCT__
3862 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3863 /*@
3864    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3865    (the default parallel PETSc format).
3866 
3867    Collective on MPI_Comm
3868 
3869    Input Parameters:
3870 +  B - the matrix
3871 .  i - the indices into j for the start of each local row (starts with zero)
3872 .  j - the column indices for each local row (starts with zero)
3873 -  v - optional values in the matrix
3874 
3875    Level: developer
3876 
3877    Notes:
3878        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3879      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3880      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3881 
3882        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3883 
3884        The format which is used for the sparse matrix input, is equivalent to a
3885     row-major ordering.. i.e for the following matrix, the input data expected is
3886     as shown:
3887 
3888         1 0 0
3889         2 0 3     P0
3890        -------
3891         4 5 6     P1
3892 
3893      Process0 [P0]: rows_owned=[0,1]
3894         i =  {0,1,3}  [size = nrow+1  = 2+1]
3895         j =  {0,0,2}  [size = nz = 6]
3896         v =  {1,2,3}  [size = nz = 6]
3897 
3898      Process1 [P1]: rows_owned=[2]
3899         i =  {0,3}    [size = nrow+1  = 1+1]
3900         j =  {0,1,2}  [size = nz = 6]
3901         v =  {4,5,6}  [size = nz = 6]
3902 
3903 .keywords: matrix, aij, compressed row, sparse, parallel
3904 
3905 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3906           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3907 @*/
3908 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3909 {
3910   PetscErrorCode ierr;
3911 
3912   PetscFunctionBegin;
3913   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3914   PetscFunctionReturn(0);
3915 }
3916 
3917 #undef __FUNCT__
3918 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3919 /*@C
3920    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3921    (the default parallel PETSc format).  For good matrix assembly performance
3922    the user should preallocate the matrix storage by setting the parameters
3923    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3924    performance can be increased by more than a factor of 50.
3925 
3926    Collective on MPI_Comm
3927 
3928    Input Parameters:
3929 +  A - the matrix
3930 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3931            (same value is used for all local rows)
3932 .  d_nnz - array containing the number of nonzeros in the various rows of the
3933            DIAGONAL portion of the local submatrix (possibly different for each row)
3934            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3935            The size of this array is equal to the number of local rows, i.e 'm'.
3936            For matrices that will be factored, you must leave room for (and set)
3937            the diagonal entry even if it is zero.
3938 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3939            submatrix (same value is used for all local rows).
3940 -  o_nnz - array containing the number of nonzeros in the various rows of the
3941            OFF-DIAGONAL portion of the local submatrix (possibly different for
3942            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3943            structure. The size of this array is equal to the number
3944            of local rows, i.e 'm'.
3945 
3946    If the *_nnz parameter is given then the *_nz parameter is ignored
3947 
3948    The AIJ format (also called the Yale sparse matrix format or
3949    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3950    storage.  The stored row and column indices begin with zero.
3951    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3952 
3953    The parallel matrix is partitioned such that the first m0 rows belong to
3954    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3955    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3956 
3957    The DIAGONAL portion of the local submatrix of a processor can be defined
3958    as the submatrix which is obtained by extraction the part corresponding to
3959    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3960    first row that belongs to the processor, r2 is the last row belonging to
3961    the this processor, and c1-c2 is range of indices of the local part of a
3962    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3963    common case of a square matrix, the row and column ranges are the same and
3964    the DIAGONAL part is also square. The remaining portion of the local
3965    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3966 
3967    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3968 
3969    You can call MatGetInfo() to get information on how effective the preallocation was;
3970    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3971    You can also run with the option -info and look for messages with the string
3972    malloc in them to see if additional memory allocation was needed.
3973 
3974    Example usage:
3975 
3976    Consider the following 8x8 matrix with 34 non-zero values, that is
3977    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3978    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3979    as follows:
3980 
3981 .vb
3982             1  2  0  |  0  3  0  |  0  4
3983     Proc0   0  5  6  |  7  0  0  |  8  0
3984             9  0 10  | 11  0  0  | 12  0
3985     -------------------------------------
3986            13  0 14  | 15 16 17  |  0  0
3987     Proc1   0 18  0  | 19 20 21  |  0  0
3988             0  0  0  | 22 23  0  | 24  0
3989     -------------------------------------
3990     Proc2  25 26 27  |  0  0 28  | 29  0
3991            30  0  0  | 31 32 33  |  0 34
3992 .ve
3993 
3994    This can be represented as a collection of submatrices as:
3995 
3996 .vb
3997       A B C
3998       D E F
3999       G H I
4000 .ve
4001 
4002    Where the submatrices A,B,C are owned by proc0, D,E,F are
4003    owned by proc1, G,H,I are owned by proc2.
4004 
4005    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4006    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4007    The 'M','N' parameters are 8,8, and have the same values on all procs.
4008 
4009    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4010    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4011    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4012    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4013    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4014    matrix, ans [DF] as another SeqAIJ matrix.
4015 
4016    When d_nz, o_nz parameters are specified, d_nz storage elements are
4017    allocated for every row of the local diagonal submatrix, and o_nz
4018    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4019    One way to choose d_nz and o_nz is to use the max nonzerors per local
4020    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4021    In this case, the values of d_nz,o_nz are:
4022 .vb
4023      proc0 : dnz = 2, o_nz = 2
4024      proc1 : dnz = 3, o_nz = 2
4025      proc2 : dnz = 1, o_nz = 4
4026 .ve
4027    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4028    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4029    for proc3. i.e we are using 12+15+10=37 storage locations to store
4030    34 values.
4031 
4032    When d_nnz, o_nnz parameters are specified, the storage is specified
4033    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4034    In the above case the values for d_nnz,o_nnz are:
4035 .vb
4036      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4037      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4038      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4039 .ve
4040    Here the space allocated is sum of all the above values i.e 34, and
4041    hence pre-allocation is perfect.
4042 
4043    Level: intermediate
4044 
4045 .keywords: matrix, aij, compressed row, sparse, parallel
4046 
4047 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4048           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4049 @*/
4050 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4051 {
4052   PetscErrorCode ierr;
4053 
4054   PetscFunctionBegin;
4055   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4056   PetscValidType(B,1);
4057   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4058   PetscFunctionReturn(0);
4059 }
4060 
4061 #undef __FUNCT__
4062 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4063 /*@
4064      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4065          CSR format the local rows.
4066 
4067    Collective on MPI_Comm
4068 
4069    Input Parameters:
4070 +  comm - MPI communicator
4071 .  m - number of local rows (Cannot be PETSC_DECIDE)
4072 .  n - This value should be the same as the local size used in creating the
4073        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4074        calculated if N is given) For square matrices n is almost always m.
4075 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4076 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4077 .   i - row indices
4078 .   j - column indices
4079 -   a - matrix values
4080 
4081    Output Parameter:
4082 .   mat - the matrix
4083 
4084    Level: intermediate
4085 
4086    Notes:
4087        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4088      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4089      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4090 
4091        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4092 
4093        The format which is used for the sparse matrix input, is equivalent to a
4094     row-major ordering.. i.e for the following matrix, the input data expected is
4095     as shown:
4096 
4097         1 0 0
4098         2 0 3     P0
4099        -------
4100         4 5 6     P1
4101 
4102      Process0 [P0]: rows_owned=[0,1]
4103         i =  {0,1,3}  [size = nrow+1  = 2+1]
4104         j =  {0,0,2}  [size = nz = 6]
4105         v =  {1,2,3}  [size = nz = 6]
4106 
4107      Process1 [P1]: rows_owned=[2]
4108         i =  {0,3}    [size = nrow+1  = 1+1]
4109         j =  {0,1,2}  [size = nz = 6]
4110         v =  {4,5,6}  [size = nz = 6]
4111 
4112 .keywords: matrix, aij, compressed row, sparse, parallel
4113 
4114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4115           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4116 @*/
4117 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4118 {
4119   PetscErrorCode ierr;
4120 
4121   PetscFunctionBegin;
4122   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4123   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4124   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4125   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4126   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4127   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4128   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4129   PetscFunctionReturn(0);
4130 }
4131 
4132 #undef __FUNCT__
4133 #define __FUNCT__ "MatCreateAIJ"
4134 /*@C
4135    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4136    (the default parallel PETSc format).  For good matrix assembly performance
4137    the user should preallocate the matrix storage by setting the parameters
4138    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4139    performance can be increased by more than a factor of 50.
4140 
4141    Collective on MPI_Comm
4142 
4143    Input Parameters:
4144 +  comm - MPI communicator
4145 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4146            This value should be the same as the local size used in creating the
4147            y vector for the matrix-vector product y = Ax.
4148 .  n - This value should be the same as the local size used in creating the
4149        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4150        calculated if N is given) For square matrices n is almost always m.
4151 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4152 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4153 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4154            (same value is used for all local rows)
4155 .  d_nnz - array containing the number of nonzeros in the various rows of the
4156            DIAGONAL portion of the local submatrix (possibly different for each row)
4157            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4158            The size of this array is equal to the number of local rows, i.e 'm'.
4159 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4160            submatrix (same value is used for all local rows).
4161 -  o_nnz - array containing the number of nonzeros in the various rows of the
4162            OFF-DIAGONAL portion of the local submatrix (possibly different for
4163            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4164            structure. The size of this array is equal to the number
4165            of local rows, i.e 'm'.
4166 
4167    Output Parameter:
4168 .  A - the matrix
4169 
4170    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4171    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4172    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4173 
4174    Notes:
4175    If the *_nnz parameter is given then the *_nz parameter is ignored
4176 
4177    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4178    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4179    storage requirements for this matrix.
4180 
4181    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4182    processor than it must be used on all processors that share the object for
4183    that argument.
4184 
4185    The user MUST specify either the local or global matrix dimensions
4186    (possibly both).
4187 
4188    The parallel matrix is partitioned across processors such that the
4189    first m0 rows belong to process 0, the next m1 rows belong to
4190    process 1, the next m2 rows belong to process 2 etc.. where
4191    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4192    values corresponding to [m x N] submatrix.
4193 
4194    The columns are logically partitioned with the n0 columns belonging
4195    to 0th partition, the next n1 columns belonging to the next
4196    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4197 
4198    The DIAGONAL portion of the local submatrix on any given processor
4199    is the submatrix corresponding to the rows and columns m,n
4200    corresponding to the given processor. i.e diagonal matrix on
4201    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4202    etc. The remaining portion of the local submatrix [m x (N-n)]
4203    constitute the OFF-DIAGONAL portion. The example below better
4204    illustrates this concept.
4205 
4206    For a square global matrix we define each processor's diagonal portion
4207    to be its local rows and the corresponding columns (a square submatrix);
4208    each processor's off-diagonal portion encompasses the remainder of the
4209    local matrix (a rectangular submatrix).
4210 
4211    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4212 
4213    When calling this routine with a single process communicator, a matrix of
4214    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4215    type of communicator, use the construction mechanism:
4216      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4217 
4218    By default, this format uses inodes (identical nodes) when possible.
4219    We search for consecutive rows with the same nonzero structure, thereby
4220    reusing matrix information to achieve increased efficiency.
4221 
4222    Options Database Keys:
4223 +  -mat_no_inode  - Do not use inodes
4224 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4225 -  -mat_aij_oneindex - Internally use indexing starting at 1
4226         rather than 0.  Note that when calling MatSetValues(),
4227         the user still MUST index entries starting at 0!
4228 
4229 
4230    Example usage:
4231 
4232    Consider the following 8x8 matrix with 34 non-zero values, that is
4233    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4234    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4235    as follows:
4236 
4237 .vb
4238             1  2  0  |  0  3  0  |  0  4
4239     Proc0   0  5  6  |  7  0  0  |  8  0
4240             9  0 10  | 11  0  0  | 12  0
4241     -------------------------------------
4242            13  0 14  | 15 16 17  |  0  0
4243     Proc1   0 18  0  | 19 20 21  |  0  0
4244             0  0  0  | 22 23  0  | 24  0
4245     -------------------------------------
4246     Proc2  25 26 27  |  0  0 28  | 29  0
4247            30  0  0  | 31 32 33  |  0 34
4248 .ve
4249 
4250    This can be represented as a collection of submatrices as:
4251 
4252 .vb
4253       A B C
4254       D E F
4255       G H I
4256 .ve
4257 
4258    Where the submatrices A,B,C are owned by proc0, D,E,F are
4259    owned by proc1, G,H,I are owned by proc2.
4260 
4261    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4262    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4263    The 'M','N' parameters are 8,8, and have the same values on all procs.
4264 
4265    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4266    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4267    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4268    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4269    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4270    matrix, ans [DF] as another SeqAIJ matrix.
4271 
4272    When d_nz, o_nz parameters are specified, d_nz storage elements are
4273    allocated for every row of the local diagonal submatrix, and o_nz
4274    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4275    One way to choose d_nz and o_nz is to use the max nonzerors per local
4276    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4277    In this case, the values of d_nz,o_nz are:
4278 .vb
4279      proc0 : dnz = 2, o_nz = 2
4280      proc1 : dnz = 3, o_nz = 2
4281      proc2 : dnz = 1, o_nz = 4
4282 .ve
4283    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4284    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4285    for proc3. i.e we are using 12+15+10=37 storage locations to store
4286    34 values.
4287 
4288    When d_nnz, o_nnz parameters are specified, the storage is specified
4289    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4290    In the above case the values for d_nnz,o_nnz are:
4291 .vb
4292      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4293      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4294      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4295 .ve
4296    Here the space allocated is sum of all the above values i.e 34, and
4297    hence pre-allocation is perfect.
4298 
4299    Level: intermediate
4300 
4301 .keywords: matrix, aij, compressed row, sparse, parallel
4302 
4303 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4304           MPIAIJ, MatCreateMPIAIJWithArrays()
4305 @*/
4306 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4307 {
4308   PetscErrorCode ierr;
4309   PetscMPIInt    size;
4310 
4311   PetscFunctionBegin;
4312   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4313   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4314   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4315   if (size > 1) {
4316     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4317     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4318   } else {
4319     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4320     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4321   }
4322   PetscFunctionReturn(0);
4323 }
4324 
4325 #undef __FUNCT__
4326 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4327 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4328 {
4329   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4330 
4331   PetscFunctionBegin;
4332   *Ad     = a->A;
4333   *Ao     = a->B;
4334   *colmap = a->garray;
4335   PetscFunctionReturn(0);
4336 }
4337 
4338 #undef __FUNCT__
4339 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4340 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4341 {
4342   PetscErrorCode ierr;
4343   PetscInt       i;
4344   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4345 
4346   PetscFunctionBegin;
4347   if (coloring->ctype == IS_COLORING_GLOBAL) {
4348     ISColoringValue *allcolors,*colors;
4349     ISColoring      ocoloring;
4350 
4351     /* set coloring for diagonal portion */
4352     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4353 
4354     /* set coloring for off-diagonal portion */
4355     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4356     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4357     for (i=0; i<a->B->cmap->n; i++) {
4358       colors[i] = allcolors[a->garray[i]];
4359     }
4360     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4361     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4362     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4363     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4364   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4365     ISColoringValue *colors;
4366     PetscInt        *larray;
4367     ISColoring      ocoloring;
4368 
4369     /* set coloring for diagonal portion */
4370     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4371     for (i=0; i<a->A->cmap->n; i++) {
4372       larray[i] = i + A->cmap->rstart;
4373     }
4374     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4375     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4376     for (i=0; i<a->A->cmap->n; i++) {
4377       colors[i] = coloring->colors[larray[i]];
4378     }
4379     ierr = PetscFree(larray);CHKERRQ(ierr);
4380     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4381     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4382     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4383 
4384     /* set coloring for off-diagonal portion */
4385     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4386     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4387     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4388     for (i=0; i<a->B->cmap->n; i++) {
4389       colors[i] = coloring->colors[larray[i]];
4390     }
4391     ierr = PetscFree(larray);CHKERRQ(ierr);
4392     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4393     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4394     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4395   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4396 
4397   PetscFunctionReturn(0);
4398 }
4399 
4400 #undef __FUNCT__
4401 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4402 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4403 {
4404   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4405   PetscErrorCode ierr;
4406 
4407   PetscFunctionBegin;
4408   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4409   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4410   PetscFunctionReturn(0);
4411 }
4412 
4413 #undef __FUNCT__
4414 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4415 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4416 {
4417   PetscErrorCode ierr;
4418   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4419   PetscInt       *indx;
4420 
4421   PetscFunctionBegin;
4422   /* This routine will ONLY return MPIAIJ type matrix */
4423   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4424   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4425   if (n == PETSC_DECIDE) {
4426     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4427   }
4428   /* Check sum(n) = N */
4429   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4430   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4431 
4432   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4433   rstart -= m;
4434 
4435   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4436   for (i=0;i<m;i++) {
4437     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4438     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4439     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4440   }
4441 
4442   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4443   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4444   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4445   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4446   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4447   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4448   PetscFunctionReturn(0);
4449 }
4450 
4451 #undef __FUNCT__
4452 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4453 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4454 {
4455   PetscErrorCode ierr;
4456   PetscInt       m,N,i,rstart,nnz,Ii;
4457   PetscInt       *indx;
4458   PetscScalar    *values;
4459 
4460   PetscFunctionBegin;
4461   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4462   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4463   for (i=0;i<m;i++) {
4464     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4465     Ii    = i + rstart;
4466     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4467     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4468   }
4469   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4470   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4471   PetscFunctionReturn(0);
4472 }
4473 
4474 #undef __FUNCT__
4475 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4476 /*@
4477       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4478                  matrices from each processor
4479 
4480     Collective on MPI_Comm
4481 
4482    Input Parameters:
4483 +    comm - the communicators the parallel matrix will live on
4484 .    inmat - the input sequential matrices
4485 .    n - number of local columns (or PETSC_DECIDE)
4486 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4487 
4488    Output Parameter:
4489 .    outmat - the parallel matrix generated
4490 
4491     Level: advanced
4492 
4493    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4494 
4495 @*/
4496 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4497 {
4498   PetscErrorCode ierr;
4499 
4500   PetscFunctionBegin;
4501   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4502   if (scall == MAT_INITIAL_MATRIX) {
4503     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4504   }
4505   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4506   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4507   PetscFunctionReturn(0);
4508 }
4509 
4510 #undef __FUNCT__
4511 #define __FUNCT__ "MatFileSplit"
4512 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4513 {
4514   PetscErrorCode    ierr;
4515   PetscMPIInt       rank;
4516   PetscInt          m,N,i,rstart,nnz;
4517   size_t            len;
4518   const PetscInt    *indx;
4519   PetscViewer       out;
4520   char              *name;
4521   Mat               B;
4522   const PetscScalar *values;
4523 
4524   PetscFunctionBegin;
4525   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4526   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4527   /* Should this be the type of the diagonal block of A? */
4528   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4529   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4530   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4531   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4532   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4533   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4534   for (i=0;i<m;i++) {
4535     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4536     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4537     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4538   }
4539   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4540   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4541 
4542   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4543   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4544   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4545   sprintf(name,"%s.%d",outfile,rank);
4546   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4547   ierr = PetscFree(name);CHKERRQ(ierr);
4548   ierr = MatView(B,out);CHKERRQ(ierr);
4549   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4550   ierr = MatDestroy(&B);CHKERRQ(ierr);
4551   PetscFunctionReturn(0);
4552 }
4553 
4554 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4555 #undef __FUNCT__
4556 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4557 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4558 {
4559   PetscErrorCode       ierr;
4560   Mat_Merge_SeqsToMPI  *merge;
4561   PetscContainer       container;
4562 
4563   PetscFunctionBegin;
4564   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4565   if (container) {
4566     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4567     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4568     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4569     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4570     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4571     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4572     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4573     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4574     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4575     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4576     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4577     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4578     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4579     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4580     ierr = PetscFree(merge);CHKERRQ(ierr);
4581     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4582   }
4583   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4584   PetscFunctionReturn(0);
4585 }
4586 
4587 #include <../src/mat/utils/freespace.h>
4588 #include <petscbt.h>
4589 
4590 #undef __FUNCT__
4591 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4592 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4593 {
4594   PetscErrorCode       ierr;
4595   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4596   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4597   PetscMPIInt          size,rank,taga,*len_s;
4598   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4599   PetscInt             proc,m;
4600   PetscInt             **buf_ri,**buf_rj;
4601   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4602   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4603   MPI_Request          *s_waits,*r_waits;
4604   MPI_Status           *status;
4605   MatScalar            *aa=a->a;
4606   MatScalar            **abuf_r,*ba_i;
4607   Mat_Merge_SeqsToMPI  *merge;
4608   PetscContainer       container;
4609 
4610   PetscFunctionBegin;
4611   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4612 
4613   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4614   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4615 
4616   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4617   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4618 
4619   bi     = merge->bi;
4620   bj     = merge->bj;
4621   buf_ri = merge->buf_ri;
4622   buf_rj = merge->buf_rj;
4623 
4624   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4625   owners = merge->rowmap->range;
4626   len_s  = merge->len_s;
4627 
4628   /* send and recv matrix values */
4629   /*-----------------------------*/
4630   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4631   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4632 
4633   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4634   for (proc=0,k=0; proc<size; proc++) {
4635     if (!len_s[proc]) continue;
4636     i = owners[proc];
4637     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4638     k++;
4639   }
4640 
4641   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4642   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4643   ierr = PetscFree(status);CHKERRQ(ierr);
4644 
4645   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4646   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4647 
4648   /* insert mat values of mpimat */
4649   /*----------------------------*/
4650   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4651   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4652 
4653   for (k=0; k<merge->nrecv; k++) {
4654     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4655     nrows = *(buf_ri_k[k]);
4656     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4657     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4658   }
4659 
4660   /* set values of ba */
4661   m = merge->rowmap->n;
4662   for (i=0; i<m; i++) {
4663     arow = owners[rank] + i;
4664     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4665     bnzi = bi[i+1] - bi[i];
4666     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4667 
4668     /* add local non-zero vals of this proc's seqmat into ba */
4669     anzi = ai[arow+1] - ai[arow];
4670     aj   = a->j + ai[arow];
4671     aa   = a->a + ai[arow];
4672     nextaj = 0;
4673     for (j=0; nextaj<anzi; j++) {
4674       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4675         ba_i[j] += aa[nextaj++];
4676       }
4677     }
4678 
4679     /* add received vals into ba */
4680     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4681       /* i-th row */
4682       if (i == *nextrow[k]) {
4683         anzi = *(nextai[k]+1) - *nextai[k];
4684         aj   = buf_rj[k] + *(nextai[k]);
4685         aa   = abuf_r[k] + *(nextai[k]);
4686         nextaj = 0;
4687         for (j=0; nextaj<anzi; j++) {
4688           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4689             ba_i[j] += aa[nextaj++];
4690           }
4691         }
4692         nextrow[k]++; nextai[k]++;
4693       }
4694     }
4695     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4696   }
4697   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4698   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4699 
4700   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4701   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4702   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4703   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4704   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4705   PetscFunctionReturn(0);
4706 }
4707 
4708 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4709 
4710 #undef __FUNCT__
4711 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4712 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4713 {
4714   PetscErrorCode       ierr;
4715   Mat                  B_mpi;
4716   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4717   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4718   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4719   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4720   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4721   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4722   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4723   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4724   MPI_Status           *status;
4725   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4726   PetscBT              lnkbt;
4727   Mat_Merge_SeqsToMPI  *merge;
4728   PetscContainer       container;
4729 
4730   PetscFunctionBegin;
4731   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4732 
4733   /* make sure it is a PETSc comm */
4734   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4735   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4736   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4737 
4738   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4739   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4740 
4741   /* determine row ownership */
4742   /*---------------------------------------------------------*/
4743   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4744   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4745   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4746   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4747   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4748   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4749   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4750 
4751   m      = merge->rowmap->n;
4752   owners = merge->rowmap->range;
4753 
4754   /* determine the number of messages to send, their lengths */
4755   /*---------------------------------------------------------*/
4756   len_s  = merge->len_s;
4757 
4758   len = 0;  /* length of buf_si[] */
4759   merge->nsend = 0;
4760   for (proc=0; proc<size; proc++) {
4761     len_si[proc] = 0;
4762     if (proc == rank) {
4763       len_s[proc] = 0;
4764     } else {
4765       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4766       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4767     }
4768     if (len_s[proc]) {
4769       merge->nsend++;
4770       nrows = 0;
4771       for (i=owners[proc]; i<owners[proc+1]; i++) {
4772         if (ai[i+1] > ai[i]) nrows++;
4773       }
4774       len_si[proc] = 2*(nrows+1);
4775       len += len_si[proc];
4776     }
4777   }
4778 
4779   /* determine the number and length of messages to receive for ij-structure */
4780   /*-------------------------------------------------------------------------*/
4781   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4782   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4783 
4784   /* post the Irecv of j-structure */
4785   /*-------------------------------*/
4786   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4787   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4788 
4789   /* post the Isend of j-structure */
4790   /*--------------------------------*/
4791   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4792 
4793   for (proc=0, k=0; proc<size; proc++) {
4794     if (!len_s[proc]) continue;
4795     i = owners[proc];
4796     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4797     k++;
4798   }
4799 
4800   /* receives and sends of j-structure are complete */
4801   /*------------------------------------------------*/
4802   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4803   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4804 
4805   /* send and recv i-structure */
4806   /*---------------------------*/
4807   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4808   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4809 
4810   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4811   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4812   for (proc=0,k=0; proc<size; proc++) {
4813     if (!len_s[proc]) continue;
4814     /* form outgoing message for i-structure:
4815          buf_si[0]:                 nrows to be sent
4816                [1:nrows]:           row index (global)
4817                [nrows+1:2*nrows+1]: i-structure index
4818     */
4819     /*-------------------------------------------*/
4820     nrows = len_si[proc]/2 - 1;
4821     buf_si_i    = buf_si + nrows+1;
4822     buf_si[0]   = nrows;
4823     buf_si_i[0] = 0;
4824     nrows = 0;
4825     for (i=owners[proc]; i<owners[proc+1]; i++) {
4826       anzi = ai[i+1] - ai[i];
4827       if (anzi) {
4828         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4829         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4830         nrows++;
4831       }
4832     }
4833     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4834     k++;
4835     buf_si += len_si[proc];
4836   }
4837 
4838   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4839   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4840 
4841   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4842   for (i=0; i<merge->nrecv; i++) {
4843     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4844   }
4845 
4846   ierr = PetscFree(len_si);CHKERRQ(ierr);
4847   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4848   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4849   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4850   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4851   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4852   ierr = PetscFree(status);CHKERRQ(ierr);
4853 
4854   /* compute a local seq matrix in each processor */
4855   /*----------------------------------------------*/
4856   /* allocate bi array and free space for accumulating nonzero column info */
4857   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4858   bi[0] = 0;
4859 
4860   /* create and initialize a linked list */
4861   nlnk = N+1;
4862   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4863 
4864   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4865   len  = ai[owners[rank+1]] - ai[owners[rank]];
4866   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4867   current_space = free_space;
4868 
4869   /* determine symbolic info for each local row */
4870   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4871 
4872   for (k=0; k<merge->nrecv; k++) {
4873     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4874     nrows = *buf_ri_k[k];
4875     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4876     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4877   }
4878 
4879   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4880   len = 0;
4881   for (i=0;i<m;i++) {
4882     bnzi   = 0;
4883     /* add local non-zero cols of this proc's seqmat into lnk */
4884     arow   = owners[rank] + i;
4885     anzi   = ai[arow+1] - ai[arow];
4886     aj     = a->j + ai[arow];
4887     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4888     bnzi += nlnk;
4889     /* add received col data into lnk */
4890     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4891       if (i == *nextrow[k]) { /* i-th row */
4892         anzi = *(nextai[k]+1) - *nextai[k];
4893         aj   = buf_rj[k] + *nextai[k];
4894         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4895         bnzi += nlnk;
4896         nextrow[k]++; nextai[k]++;
4897       }
4898     }
4899     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4900 
4901     /* if free space is not available, make more free space */
4902     if (current_space->local_remaining<bnzi) {
4903       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4904       nspacedouble++;
4905     }
4906     /* copy data into free space, then initialize lnk */
4907     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4908     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4909 
4910     current_space->array           += bnzi;
4911     current_space->local_used      += bnzi;
4912     current_space->local_remaining -= bnzi;
4913 
4914     bi[i+1] = bi[i] + bnzi;
4915   }
4916 
4917   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4918 
4919   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4920   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4921   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4922 
4923   /* create symbolic parallel matrix B_mpi */
4924   /*---------------------------------------*/
4925     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4926   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4927   if (n==PETSC_DECIDE) {
4928     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4929   } else {
4930     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4931   }
4932   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4933   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4934   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4935   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4936   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4937 
4938   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4939   B_mpi->assembled     = PETSC_FALSE;
4940   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4941   merge->bi            = bi;
4942   merge->bj            = bj;
4943   merge->buf_ri        = buf_ri;
4944   merge->buf_rj        = buf_rj;
4945   merge->coi           = PETSC_NULL;
4946   merge->coj           = PETSC_NULL;
4947   merge->owners_co     = PETSC_NULL;
4948 
4949   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4950 
4951   /* attach the supporting struct to B_mpi for reuse */
4952   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4953   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4954   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4955   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4956   *mpimat = B_mpi;
4957 
4958   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4959   PetscFunctionReturn(0);
4960 }
4961 
4962 #undef __FUNCT__
4963 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4964 /*@C
4965       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4966                  matrices from each processor
4967 
4968     Collective on MPI_Comm
4969 
4970    Input Parameters:
4971 +    comm - the communicators the parallel matrix will live on
4972 .    seqmat - the input sequential matrices
4973 .    m - number of local rows (or PETSC_DECIDE)
4974 .    n - number of local columns (or PETSC_DECIDE)
4975 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4976 
4977    Output Parameter:
4978 .    mpimat - the parallel matrix generated
4979 
4980     Level: advanced
4981 
4982    Notes:
4983      The dimensions of the sequential matrix in each processor MUST be the same.
4984      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4985      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4986 @*/
4987 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4988 {
4989   PetscErrorCode   ierr;
4990   PetscMPIInt     size;
4991 
4992   PetscFunctionBegin;
4993   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4994   if (size == 1) {
4995      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4996      if (scall == MAT_INITIAL_MATRIX) {
4997        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4998      } else {
4999        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5000      }
5001      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5002      PetscFunctionReturn(0);
5003   }
5004   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5005   if (scall == MAT_INITIAL_MATRIX) {
5006     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5007   }
5008   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5009   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5010   PetscFunctionReturn(0);
5011 }
5012 
5013 #undef __FUNCT__
5014 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5015 /*@
5016      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5017           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5018           with MatGetSize()
5019 
5020     Not Collective
5021 
5022    Input Parameters:
5023 +    A - the matrix
5024 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5025 
5026    Output Parameter:
5027 .    A_loc - the local sequential matrix generated
5028 
5029     Level: developer
5030 
5031 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5032 
5033 @*/
5034 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5035 {
5036   PetscErrorCode  ierr;
5037   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
5038   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5039   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5040   MatScalar       *aa=a->a,*ba=b->a,*cam;
5041   PetscScalar     *ca;
5042   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5043   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
5044   PetscBool       match;
5045 
5046   PetscFunctionBegin;
5047   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5048   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5049   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5050   if (scall == MAT_INITIAL_MATRIX) {
5051     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5052     ci[0] = 0;
5053     for (i=0; i<am; i++) {
5054       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5055     }
5056     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5057     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5058     k = 0;
5059     for (i=0; i<am; i++) {
5060       ncols_o = bi[i+1] - bi[i];
5061       ncols_d = ai[i+1] - ai[i];
5062       /* off-diagonal portion of A */
5063       for (jo=0; jo<ncols_o; jo++) {
5064         col = cmap[*bj];
5065         if (col >= cstart) break;
5066         cj[k]   = col; bj++;
5067         ca[k++] = *ba++;
5068       }
5069       /* diagonal portion of A */
5070       for (j=0; j<ncols_d; j++) {
5071         cj[k]   = cstart + *aj++;
5072         ca[k++] = *aa++;
5073       }
5074       /* off-diagonal portion of A */
5075       for (j=jo; j<ncols_o; j++) {
5076         cj[k]   = cmap[*bj++];
5077         ca[k++] = *ba++;
5078       }
5079     }
5080     /* put together the new matrix */
5081     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5082     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5083     /* Since these are PETSc arrays, change flags to free them as necessary. */
5084     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5085     mat->free_a  = PETSC_TRUE;
5086     mat->free_ij = PETSC_TRUE;
5087     mat->nonew   = 0;
5088   } else if (scall == MAT_REUSE_MATRIX) {
5089     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5090     ci = mat->i; cj = mat->j; cam = mat->a;
5091     for (i=0; i<am; i++) {
5092       /* off-diagonal portion of A */
5093       ncols_o = bi[i+1] - bi[i];
5094       for (jo=0; jo<ncols_o; jo++) {
5095         col = cmap[*bj];
5096         if (col >= cstart) break;
5097         *cam++ = *ba++; bj++;
5098       }
5099       /* diagonal portion of A */
5100       ncols_d = ai[i+1] - ai[i];
5101       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5102       /* off-diagonal portion of A */
5103       for (j=jo; j<ncols_o; j++) {
5104         *cam++ = *ba++; bj++;
5105       }
5106     }
5107   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5108   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5109   PetscFunctionReturn(0);
5110 }
5111 
5112 #undef __FUNCT__
5113 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5114 /*@C
5115      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5116 
5117     Not Collective
5118 
5119    Input Parameters:
5120 +    A - the matrix
5121 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5122 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5123 
5124    Output Parameter:
5125 .    A_loc - the local sequential matrix generated
5126 
5127     Level: developer
5128 
5129 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5130 
5131 @*/
5132 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5133 {
5134   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5135   PetscErrorCode    ierr;
5136   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5137   IS                isrowa,iscola;
5138   Mat               *aloc;
5139   PetscBool       match;
5140 
5141   PetscFunctionBegin;
5142   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5143   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5144   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5145   if (!row) {
5146     start = A->rmap->rstart; end = A->rmap->rend;
5147     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5148   } else {
5149     isrowa = *row;
5150   }
5151   if (!col) {
5152     start = A->cmap->rstart;
5153     cmap  = a->garray;
5154     nzA   = a->A->cmap->n;
5155     nzB   = a->B->cmap->n;
5156     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5157     ncols = 0;
5158     for (i=0; i<nzB; i++) {
5159       if (cmap[i] < start) idx[ncols++] = cmap[i];
5160       else break;
5161     }
5162     imark = i;
5163     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5164     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5165     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5166   } else {
5167     iscola = *col;
5168   }
5169   if (scall != MAT_INITIAL_MATRIX) {
5170     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5171     aloc[0] = *A_loc;
5172   }
5173   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5174   *A_loc = aloc[0];
5175   ierr = PetscFree(aloc);CHKERRQ(ierr);
5176   if (!row) {
5177     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5178   }
5179   if (!col) {
5180     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5181   }
5182   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5183   PetscFunctionReturn(0);
5184 }
5185 
5186 #undef __FUNCT__
5187 #define __FUNCT__ "MatGetBrowsOfAcols"
5188 /*@C
5189     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5190 
5191     Collective on Mat
5192 
5193    Input Parameters:
5194 +    A,B - the matrices in mpiaij format
5195 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5196 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5197 
5198    Output Parameter:
5199 +    rowb, colb - index sets of rows and columns of B to extract
5200 -    B_seq - the sequential matrix generated
5201 
5202     Level: developer
5203 
5204 @*/
5205 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5206 {
5207   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5208   PetscErrorCode    ierr;
5209   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5210   IS                isrowb,iscolb;
5211   Mat               *bseq=PETSC_NULL;
5212 
5213   PetscFunctionBegin;
5214   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5215     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5216   }
5217   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5218 
5219   if (scall == MAT_INITIAL_MATRIX) {
5220     start = A->cmap->rstart;
5221     cmap  = a->garray;
5222     nzA   = a->A->cmap->n;
5223     nzB   = a->B->cmap->n;
5224     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5225     ncols = 0;
5226     for (i=0; i<nzB; i++) {  /* row < local row index */
5227       if (cmap[i] < start) idx[ncols++] = cmap[i];
5228       else break;
5229     }
5230     imark = i;
5231     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5232     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5233     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5234     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5235   } else {
5236     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5237     isrowb = *rowb; iscolb = *colb;
5238     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5239     bseq[0] = *B_seq;
5240   }
5241   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5242   *B_seq = bseq[0];
5243   ierr = PetscFree(bseq);CHKERRQ(ierr);
5244   if (!rowb) {
5245     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5246   } else {
5247     *rowb = isrowb;
5248   }
5249   if (!colb) {
5250     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5251   } else {
5252     *colb = iscolb;
5253   }
5254   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5255   PetscFunctionReturn(0);
5256 }
5257 
5258 #undef __FUNCT__
5259 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5260 /*
5261     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5262     of the OFF-DIAGONAL portion of local A
5263 
5264     Collective on Mat
5265 
5266    Input Parameters:
5267 +    A,B - the matrices in mpiaij format
5268 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5269 
5270    Output Parameter:
5271 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5272 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5273 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5274 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5275 
5276     Level: developer
5277 
5278 */
5279 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5280 {
5281   VecScatter_MPI_General *gen_to,*gen_from;
5282   PetscErrorCode         ierr;
5283   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5284   Mat_SeqAIJ             *b_oth;
5285   VecScatter             ctx=a->Mvctx;
5286   MPI_Comm               comm=((PetscObject)ctx)->comm;
5287   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5288   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5289   PetscScalar            *rvalues,*svalues;
5290   MatScalar              *b_otha,*bufa,*bufA;
5291   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5292   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5293   MPI_Status             *sstatus,rstatus;
5294   PetscMPIInt            jj;
5295   PetscInt               *cols,sbs,rbs;
5296   PetscScalar            *vals;
5297 
5298   PetscFunctionBegin;
5299   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5300     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5301   }
5302   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5303   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5304 
5305   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5306   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5307   rvalues  = gen_from->values; /* holds the length of receiving row */
5308   svalues  = gen_to->values;   /* holds the length of sending row */
5309   nrecvs   = gen_from->n;
5310   nsends   = gen_to->n;
5311 
5312   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5313   srow     = gen_to->indices;   /* local row index to be sent */
5314   sstarts  = gen_to->starts;
5315   sprocs   = gen_to->procs;
5316   sstatus  = gen_to->sstatus;
5317   sbs      = gen_to->bs;
5318   rstarts  = gen_from->starts;
5319   rprocs   = gen_from->procs;
5320   rbs      = gen_from->bs;
5321 
5322   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5323   if (scall == MAT_INITIAL_MATRIX) {
5324     /* i-array */
5325     /*---------*/
5326     /*  post receives */
5327     for (i=0; i<nrecvs; i++) {
5328       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5329       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5330       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5331     }
5332 
5333     /* pack the outgoing message */
5334     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5335     sstartsj[0] = 0;  rstartsj[0] = 0;
5336     len = 0; /* total length of j or a array to be sent */
5337     k = 0;
5338     for (i=0; i<nsends; i++) {
5339       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5340       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5341       for (j=0; j<nrows; j++) {
5342         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5343         for (l=0; l<sbs; l++) {
5344           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5345           rowlen[j*sbs+l] = ncols;
5346           len += ncols;
5347           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5348         }
5349         k++;
5350       }
5351       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5352       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5353     }
5354     /* recvs and sends of i-array are completed */
5355     i = nrecvs;
5356     while (i--) {
5357       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5358     }
5359     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5360 
5361     /* allocate buffers for sending j and a arrays */
5362     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5363     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5364 
5365     /* create i-array of B_oth */
5366     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5367     b_othi[0] = 0;
5368     len = 0; /* total length of j or a array to be received */
5369     k = 0;
5370     for (i=0; i<nrecvs; i++) {
5371       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5372       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5373       for (j=0; j<nrows; j++) {
5374         b_othi[k+1] = b_othi[k] + rowlen[j];
5375         len += rowlen[j]; k++;
5376       }
5377       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5378     }
5379 
5380     /* allocate space for j and a arrrays of B_oth */
5381     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5382     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5383 
5384     /* j-array */
5385     /*---------*/
5386     /*  post receives of j-array */
5387     for (i=0; i<nrecvs; i++) {
5388       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5389       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5390     }
5391 
5392     /* pack the outgoing message j-array */
5393     k = 0;
5394     for (i=0; i<nsends; i++) {
5395       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5396       bufJ = bufj+sstartsj[i];
5397       for (j=0; j<nrows; j++) {
5398         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5399         for (ll=0; ll<sbs; ll++) {
5400           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5401           for (l=0; l<ncols; l++) {
5402             *bufJ++ = cols[l];
5403           }
5404           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5405         }
5406       }
5407       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5408     }
5409 
5410     /* recvs and sends of j-array are completed */
5411     i = nrecvs;
5412     while (i--) {
5413       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5414     }
5415     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5416   } else if (scall == MAT_REUSE_MATRIX) {
5417     sstartsj = *startsj_s;
5418     rstartsj = *startsj_r;
5419     bufa     = *bufa_ptr;
5420     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5421     b_otha   = b_oth->a;
5422   } else {
5423     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5424   }
5425 
5426   /* a-array */
5427   /*---------*/
5428   /*  post receives of a-array */
5429   for (i=0; i<nrecvs; i++) {
5430     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5431     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5432   }
5433 
5434   /* pack the outgoing message a-array */
5435   k = 0;
5436   for (i=0; i<nsends; i++) {
5437     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5438     bufA = bufa+sstartsj[i];
5439     for (j=0; j<nrows; j++) {
5440       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5441       for (ll=0; ll<sbs; ll++) {
5442         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5443         for (l=0; l<ncols; l++) {
5444           *bufA++ = vals[l];
5445         }
5446         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5447       }
5448     }
5449     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5450   }
5451   /* recvs and sends of a-array are completed */
5452   i = nrecvs;
5453   while (i--) {
5454     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5455   }
5456   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5457   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5458 
5459   if (scall == MAT_INITIAL_MATRIX) {
5460     /* put together the new matrix */
5461     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5462 
5463     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5464     /* Since these are PETSc arrays, change flags to free them as necessary. */
5465     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5466     b_oth->free_a  = PETSC_TRUE;
5467     b_oth->free_ij = PETSC_TRUE;
5468     b_oth->nonew   = 0;
5469 
5470     ierr = PetscFree(bufj);CHKERRQ(ierr);
5471     if (!startsj_s || !bufa_ptr) {
5472       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5473       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5474     } else {
5475       *startsj_s = sstartsj;
5476       *startsj_r = rstartsj;
5477       *bufa_ptr  = bufa;
5478     }
5479   }
5480   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5481   PetscFunctionReturn(0);
5482 }
5483 
5484 #undef __FUNCT__
5485 #define __FUNCT__ "MatGetCommunicationStructs"
5486 /*@C
5487   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5488 
5489   Not Collective
5490 
5491   Input Parameters:
5492 . A - The matrix in mpiaij format
5493 
5494   Output Parameter:
5495 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5496 . colmap - A map from global column index to local index into lvec
5497 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5498 
5499   Level: developer
5500 
5501 @*/
5502 #if defined (PETSC_USE_CTABLE)
5503 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5504 #else
5505 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5506 #endif
5507 {
5508   Mat_MPIAIJ *a;
5509 
5510   PetscFunctionBegin;
5511   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5512   PetscValidPointer(lvec, 2);
5513   PetscValidPointer(colmap, 3);
5514   PetscValidPointer(multScatter, 4);
5515   a = (Mat_MPIAIJ *) A->data;
5516   if (lvec) *lvec = a->lvec;
5517   if (colmap) *colmap = a->colmap;
5518   if (multScatter) *multScatter = a->Mvctx;
5519   PetscFunctionReturn(0);
5520 }
5521 
5522 EXTERN_C_BEGIN
5523 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5524 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5525 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5526 EXTERN_C_END
5527 
5528 #undef __FUNCT__
5529 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5530 /*
5531     Computes (B'*A')' since computing B*A directly is untenable
5532 
5533                n                       p                          p
5534         (              )       (              )         (                  )
5535       m (      A       )  *  n (       B      )   =   m (         C        )
5536         (              )       (              )         (                  )
5537 
5538 */
5539 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5540 {
5541   PetscErrorCode     ierr;
5542   Mat                At,Bt,Ct;
5543 
5544   PetscFunctionBegin;
5545   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5546   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5547   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5548   ierr = MatDestroy(&At);CHKERRQ(ierr);
5549   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5550   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5551   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5552   PetscFunctionReturn(0);
5553 }
5554 
5555 #undef __FUNCT__
5556 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5557 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5558 {
5559   PetscErrorCode ierr;
5560   PetscInt       m=A->rmap->n,n=B->cmap->n;
5561   Mat            Cmat;
5562 
5563   PetscFunctionBegin;
5564   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5565   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5566   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5567   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5568   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5569   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5570   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5571   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5572 
5573   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5574   *C = Cmat;
5575   PetscFunctionReturn(0);
5576 }
5577 
5578 /* ----------------------------------------------------------------*/
5579 #undef __FUNCT__
5580 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5581 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5582 {
5583   PetscErrorCode ierr;
5584 
5585   PetscFunctionBegin;
5586   if (scall == MAT_INITIAL_MATRIX) {
5587     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5588   }
5589   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5590   PetscFunctionReturn(0);
5591 }
5592 
5593 EXTERN_C_BEGIN
5594 #if defined(PETSC_HAVE_MUMPS)
5595 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5596 #endif
5597 #if defined(PETSC_HAVE_PASTIX)
5598 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5599 #endif
5600 #if defined(PETSC_HAVE_SUPERLU_DIST)
5601 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5602 #endif
5603 #if defined(PETSC_HAVE_CLIQUE)
5604 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5605 #endif
5606 EXTERN_C_END
5607 
5608 /*MC
5609    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5610 
5611    Options Database Keys:
5612 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5613 
5614   Level: beginner
5615 
5616 .seealso: MatCreateAIJ()
5617 M*/
5618 
5619 EXTERN_C_BEGIN
5620 #undef __FUNCT__
5621 #define __FUNCT__ "MatCreate_MPIAIJ"
5622 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5623 {
5624   Mat_MPIAIJ     *b;
5625   PetscErrorCode ierr;
5626   PetscMPIInt    size;
5627 
5628   PetscFunctionBegin;
5629   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5630   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5631   B->data         = (void*)b;
5632   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5633   B->assembled    = PETSC_FALSE;
5634   B->insertmode   = NOT_SET_VALUES;
5635   b->size         = size;
5636   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5637 
5638   /* build cache for off array entries formed */
5639   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5640   b->donotstash  = PETSC_FALSE;
5641   b->colmap      = 0;
5642   b->garray      = 0;
5643   b->roworiented = PETSC_TRUE;
5644 
5645   /* stuff used for matrix vector multiply */
5646   b->lvec      = PETSC_NULL;
5647   b->Mvctx     = PETSC_NULL;
5648 
5649   /* stuff for MatGetRow() */
5650   b->rowindices   = 0;
5651   b->rowvalues    = 0;
5652   b->getrowactive = PETSC_FALSE;
5653 
5654   /* flexible pointer used in CUSP/CUSPARSE classes */
5655   b->spptr        = PETSC_NULL;
5656 
5657 #if defined(PETSC_HAVE_MUMPS)
5658   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5659                                      "MatGetFactor_aij_mumps",
5660                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5661 #endif
5662 #if defined(PETSC_HAVE_PASTIX)
5663   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5664                                            "MatGetFactor_mpiaij_pastix",
5665                                            MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5666 #endif
5667 #if defined(PETSC_HAVE_SUPERLU_DIST)
5668   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5669                                      "MatGetFactor_mpiaij_superlu_dist",
5670                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5671 #endif
5672 #if defined(PETSC_HAVE_CLIQUE)
5673   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5674                                      "MatGetFactor_aij_clique",
5675                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5676 #endif
5677   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5678                                      "MatStoreValues_MPIAIJ",
5679                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5680   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5681                                      "MatRetrieveValues_MPIAIJ",
5682                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5683   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5684                                      "MatGetDiagonalBlock_MPIAIJ",
5685                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5686   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5687                                      "MatIsTranspose_MPIAIJ",
5688                                      MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5689   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5690                                      "MatMPIAIJSetPreallocation_MPIAIJ",
5691                                      MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5692   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5693                                      "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5694                                      MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5695   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5696                                      "MatDiagonalScaleLocal_MPIAIJ",
5697                                      MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5698   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5699                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5700                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5701   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5702                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5703                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5704   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5705                                      "MatConvert_MPIAIJ_MPISBAIJ",
5706                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5707   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5708                                      "MatMatMult_MPIDense_MPIAIJ",
5709                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5710   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5711                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5712                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5713   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5714                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5715                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5716   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5717   PetscFunctionReturn(0);
5718 }
5719 EXTERN_C_END
5720 
5721 #undef __FUNCT__
5722 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5723 /*@
5724      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5725          and "off-diagonal" part of the matrix in CSR format.
5726 
5727    Collective on MPI_Comm
5728 
5729    Input Parameters:
5730 +  comm - MPI communicator
5731 .  m - number of local rows (Cannot be PETSC_DECIDE)
5732 .  n - This value should be the same as the local size used in creating the
5733        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5734        calculated if N is given) For square matrices n is almost always m.
5735 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5736 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5737 .   i - row indices for "diagonal" portion of matrix
5738 .   j - column indices
5739 .   a - matrix values
5740 .   oi - row indices for "off-diagonal" portion of matrix
5741 .   oj - column indices
5742 -   oa - matrix values
5743 
5744    Output Parameter:
5745 .   mat - the matrix
5746 
5747    Level: advanced
5748 
5749    Notes:
5750        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5751        must free the arrays once the matrix has been destroyed and not before.
5752 
5753        The i and j indices are 0 based
5754 
5755        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5756 
5757        This sets local rows and cannot be used to set off-processor values.
5758 
5759        You cannot later use MatSetValues() to change values in this matrix.
5760 
5761 .keywords: matrix, aij, compressed row, sparse, parallel
5762 
5763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5764           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5765 @*/
5766 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5767                                                PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5768 {
5769   PetscErrorCode ierr;
5770   Mat_MPIAIJ     *maij;
5771 
5772  PetscFunctionBegin;
5773   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5774   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5775   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5776   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5777   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5778   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5779   maij = (Mat_MPIAIJ*) (*mat)->data;
5780   maij->donotstash     = PETSC_TRUE;
5781   (*mat)->preallocated = PETSC_TRUE;
5782 
5783   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5784   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5785 
5786   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5787   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5788 
5789   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5790   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5791   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5792   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5793 
5794   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5795   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5796   PetscFunctionReturn(0);
5797 }
5798 
5799 /*
5800     Special version for direct calls from Fortran
5801 */
5802 #include <petsc-private/fortranimpl.h>
5803 
5804 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5805 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5806 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5807 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5808 #endif
5809 
5810 /* Change these macros so can be used in void function */
5811 #undef CHKERRQ
5812 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5813 #undef SETERRQ2
5814 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5815 #undef SETERRQ3
5816 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5817 #undef SETERRQ
5818 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5819 
5820 EXTERN_C_BEGIN
5821 #undef __FUNCT__
5822 #define __FUNCT__ "matsetvaluesmpiaij_"
5823 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5824 {
5825   Mat             mat = *mmat;
5826   PetscInt        m = *mm, n = *mn;
5827   InsertMode      addv = *maddv;
5828   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5829   PetscScalar     value;
5830   PetscErrorCode  ierr;
5831 
5832   MatCheckPreallocated(mat,1);
5833   if (mat->insertmode == NOT_SET_VALUES) {
5834     mat->insertmode = addv;
5835   }
5836 #if defined(PETSC_USE_DEBUG)
5837   else if (mat->insertmode != addv) {
5838     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5839   }
5840 #endif
5841   {
5842   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5843   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5844   PetscBool       roworiented = aij->roworiented;
5845 
5846   /* Some Variables required in the macro */
5847   Mat             A = aij->A;
5848   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5849   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5850   MatScalar       *aa = a->a;
5851   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5852   Mat             B = aij->B;
5853   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5854   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5855   MatScalar       *ba = b->a;
5856 
5857   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5858   PetscInt        nonew = a->nonew;
5859   MatScalar       *ap1,*ap2;
5860 
5861   PetscFunctionBegin;
5862   for (i=0; i<m; i++) {
5863     if (im[i] < 0) continue;
5864 #if defined(PETSC_USE_DEBUG)
5865     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5866 #endif
5867     if (im[i] >= rstart && im[i] < rend) {
5868       row      = im[i] - rstart;
5869       lastcol1 = -1;
5870       rp1      = aj + ai[row];
5871       ap1      = aa + ai[row];
5872       rmax1    = aimax[row];
5873       nrow1    = ailen[row];
5874       low1     = 0;
5875       high1    = nrow1;
5876       lastcol2 = -1;
5877       rp2      = bj + bi[row];
5878       ap2      = ba + bi[row];
5879       rmax2    = bimax[row];
5880       nrow2    = bilen[row];
5881       low2     = 0;
5882       high2    = nrow2;
5883 
5884       for (j=0; j<n; j++) {
5885         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5886         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5887         if (in[j] >= cstart && in[j] < cend) {
5888           col = in[j] - cstart;
5889           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5890         } else if (in[j] < 0) continue;
5891 #if defined(PETSC_USE_DEBUG)
5892         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5893 #endif
5894         else {
5895           if (mat->was_assembled) {
5896             if (!aij->colmap) {
5897               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5898             }
5899 #if defined (PETSC_USE_CTABLE)
5900             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5901             col--;
5902 #else
5903             col = aij->colmap[in[j]] - 1;
5904 #endif
5905             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5906               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5907               col =  in[j];
5908               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5909               B = aij->B;
5910               b = (Mat_SeqAIJ*)B->data;
5911               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5912               rp2      = bj + bi[row];
5913               ap2      = ba + bi[row];
5914               rmax2    = bimax[row];
5915               nrow2    = bilen[row];
5916               low2     = 0;
5917               high2    = nrow2;
5918               bm       = aij->B->rmap->n;
5919               ba = b->a;
5920             }
5921           } else col = in[j];
5922           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5923         }
5924       }
5925     } else {
5926       if (!aij->donotstash) {
5927         if (roworiented) {
5928           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5929         } else {
5930           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5931         }
5932       }
5933     }
5934   }}
5935   PetscFunctionReturnVoid();
5936 }
5937 EXTERN_C_END
5938 
5939