xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6f3c3dcf8ef4015f292691ee124e8c4bddb46dfd)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 
6 /*MC
7    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
8 
9    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
10    and MATMPIAIJ otherwise.  As a result, for single process communicators,
11   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
12   for communicators controlling multiple processes.  It is recommended that you call both of
13   the above preallocation routines for simplicity.
14 
15    Options Database Keys:
16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
17 
18   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
19    enough exist.
20 
21   Level: beginner
22 
23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
24 M*/
25 
26 /*MC
27    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
28 
29    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
30    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
31    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
32   for communicators controlling multiple processes.  It is recommended that you call both of
33   the above preallocation routines for simplicity.
34 
35    Options Database Keys:
36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
37 
38   Level: beginner
39 
40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
41 M*/
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
46 {
47   PetscErrorCode  ierr;
48   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
49   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
50   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
51   const PetscInt  *ia,*ib;
52   const MatScalar *aa,*bb;
53   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
54   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
55 
56   PetscFunctionBegin;
57   *keptrows = 0;
58   ia        = a->i;
59   ib        = b->i;
60   for (i=0; i<m; i++) {
61     na = ia[i+1] - ia[i];
62     nb = ib[i+1] - ib[i];
63     if (!na && !nb) {
64       cnt++;
65       goto ok1;
66     }
67     aa = a->a + ia[i];
68     for (j=0; j<na; j++) {
69       if (aa[j] != 0.0) goto ok1;
70     }
71     bb = b->a + ib[i];
72     for (j=0; j <nb; j++) {
73       if (bb[j] != 0.0) goto ok1;
74     }
75     cnt++;
76 ok1:;
77   }
78   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
79   if (!n0rows) PetscFunctionReturn(0);
80   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
81   cnt  = 0;
82   for (i=0; i<m; i++) {
83     na = ia[i+1] - ia[i];
84     nb = ib[i+1] - ib[i];
85     if (!na && !nb) continue;
86     aa = a->a + ia[i];
87     for (j=0; j<na;j++) {
88       if (aa[j] != 0.0) {
89         rows[cnt++] = rstart + i;
90         goto ok2;
91       }
92     }
93     bb = b->a + ib[i];
94     for (j=0; j<nb; j++) {
95       if (bb[j] != 0.0) {
96         rows[cnt++] = rstart + i;
97         goto ok2;
98       }
99     }
100 ok2:;
101   }
102   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
103   PetscFunctionReturn(0);
104 }
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
109 {
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
111   PetscErrorCode ierr;
112   PetscInt       i,rstart,nrows,*rows;
113 
114   PetscFunctionBegin;
115   *zrows = NULL;
116   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
117   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
118   for (i=0; i<nrows; i++) rows[i] += rstart;
119   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
126 {
127   PetscErrorCode ierr;
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
129   PetscInt       i,n,*garray = aij->garray;
130   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
131   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
132   PetscReal      *work;
133 
134   PetscFunctionBegin;
135   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
136   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
137   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 */
181 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
182 {
183   PetscMPIInt    rank,size;
184   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
185   PetscErrorCode ierr;
186   Mat            mat;
187   Mat_SeqAIJ     *gmata;
188   PetscMPIInt    tag;
189   MPI_Status     status;
190   PetscBool      aij;
191   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
192 
193   PetscFunctionBegin;
194   CHKMEMQ;
195   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
197   if (!rank) {
198     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
199     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
200   }
201   if (reuse == MAT_INITIAL_MATRIX) {
202     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
203     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
204     if (!rank) {
205       bses[0] = gmat->rmap->bs;
206       bses[1] = gmat->cmap->bs;
207     }
208     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
209     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
210     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
211     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
212     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
213     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
214 
215     rowners[0] = 0;
216     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj   = 0;
232       for (i=0; i<m; i++) {
233         for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236           jj++;
237         }
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj   = 0;
266       for (i=0; i<m; i++) {
267         for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270           jj++;
271         }
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298 
299     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
300 
301     *inmat = mat;
302   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
303     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
304     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
305     mat  = *inmat;
306     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
307     if (!rank) {
308       /* send numerical values to other processes */
309       gmata  = (Mat_SeqAIJ*) gmat->data;
310       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
311       gmataa = gmata->a;
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
315       }
316       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
317     } else {
318       /* receive numerical values from process 0*/
319       nz   = Ad->nz + Ao->nz;
320       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
321       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
322     }
323     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
324     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
325     ad = Ad->a;
326     ao = Ao->a;
327     if (mat->rmap->n) {
328       i  = 0;
329       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     for (i=1; i<mat->rmap->n; i++) {
333       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
334       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
335     }
336     i--;
337     if (mat->rmap->n) {
338       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
339     }
340     if (rank) {
341       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
342     }
343   }
344   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
345   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
346   CHKMEMQ;
347   PetscFunctionReturn(0);
348 }
349 
350 /*
351   Local utility routine that creates a mapping from the global column
352 number to the local number in the off-diagonal part of the local
353 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
354 a slightly higher hash table cost; without it it is not scalable (each processor
355 has an order N integer array but is fast to acess.
356 */
357 #undef __FUNCT__
358 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
359 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
360 {
361   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       n = aij->B->cmap->n,i;
364 
365   PetscFunctionBegin;
366   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
367 #if defined(PETSC_USE_CTABLE)
368   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
369   for (i=0; i<n; i++) {
370     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
371   }
372 #else
373   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
374   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
377 #endif
378   PetscFunctionReturn(0);
379 }
380 
381 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
382 { \
383     if (col <= lastcol1)  low1 = 0;     \
384     else                 high1 = nrow1; \
385     lastcol1 = col;\
386     while (high1-low1 > 5) { \
387       t = (low1+high1)/2; \
388       if (rp1[t] > col) high1 = t; \
389       else              low1  = t; \
390     } \
391       for (_i=low1; _i<high1; _i++) { \
392         if (rp1[_i] > col) break; \
393         if (rp1[_i] == col) { \
394           if (addv == ADD_VALUES) ap1[_i] += value;   \
395           else                    ap1[_i] = value; \
396           goto a_noinsert; \
397         } \
398       }  \
399       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
400       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
401       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
402       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
403       N = nrow1++ - 1; a->nz++; high1++; \
404       /* shift up all the later entries in this row */ \
405       for (ii=N; ii>=_i; ii--) { \
406         rp1[ii+1] = rp1[ii]; \
407         ap1[ii+1] = ap1[ii]; \
408       } \
409       rp1[_i] = col;  \
410       ap1[_i] = value;  \
411       a_noinsert: ; \
412       ailen[row] = nrow1; \
413 }
414 
415 
416 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
417   { \
418     if (col <= lastcol2) low2 = 0;                        \
419     else high2 = nrow2;                                   \
420     lastcol2 = col;                                       \
421     while (high2-low2 > 5) {                              \
422       t = (low2+high2)/2;                                 \
423       if (rp2[t] > col) high2 = t;                        \
424       else             low2  = t;                         \
425     }                                                     \
426     for (_i=low2; _i<high2; _i++) {                       \
427       if (rp2[_i] > col) break;                           \
428       if (rp2[_i] == col) {                               \
429         if (addv == ADD_VALUES) ap2[_i] += value;         \
430         else                    ap2[_i] = value;          \
431         goto b_noinsert;                                  \
432       }                                                   \
433     }                                                     \
434     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
435     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
436     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
437     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
438     N = nrow2++ - 1; b->nz++; high2++;                    \
439     /* shift up all the later entries in this row */      \
440     for (ii=N; ii>=_i; ii--) {                            \
441       rp2[ii+1] = rp2[ii];                                \
442       ap2[ii+1] = ap2[ii];                                \
443     }                                                     \
444     rp2[_i] = col;                                        \
445     ap2[_i] = value;                                      \
446     b_noinsert: ;                                         \
447     bilen[row] = nrow2;                                   \
448   }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
452 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
453 {
454   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
455   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
456   PetscErrorCode ierr;
457   PetscInt       l,*garray = mat->garray,diag;
458 
459   PetscFunctionBegin;
460   /* code only works for square matrices A */
461 
462   /* find size of row to the left of the diagonal part */
463   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
464   row  = row - diag;
465   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
466     if (garray[b->j[b->i[row]+l]] > diag) break;
467   }
468   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
469 
470   /* diagonal part */
471   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
472 
473   /* right of diagonal part */
474   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatSetValues_MPIAIJ"
480 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   PetscScalar    value;
484   PetscErrorCode ierr;
485   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
486   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
487   PetscBool      roworiented = aij->roworiented;
488 
489   /* Some Variables required in the macro */
490   Mat        A                 = aij->A;
491   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
492   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
493   MatScalar  *aa               = a->a;
494   PetscBool  ignorezeroentries = a->ignorezeroentries;
495   Mat        B                 = aij->B;
496   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
497   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
498   MatScalar  *ba               = b->a;
499 
500   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
501   PetscInt  nonew;
502   MatScalar *ap1,*ap2;
503 
504   PetscFunctionBegin;
505   if (v) PetscValidScalarPointer(v,6);
506   for (i=0; i<m; i++) {
507     if (im[i] < 0) continue;
508 #if defined(PETSC_USE_DEBUG)
509     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
510 #endif
511     if (im[i] >= rstart && im[i] < rend) {
512       row      = im[i] - rstart;
513       lastcol1 = -1;
514       rp1      = aj + ai[row];
515       ap1      = aa + ai[row];
516       rmax1    = aimax[row];
517       nrow1    = ailen[row];
518       low1     = 0;
519       high1    = nrow1;
520       lastcol2 = -1;
521       rp2      = bj + bi[row];
522       ap2      = ba + bi[row];
523       rmax2    = bimax[row];
524       nrow2    = bilen[row];
525       low2     = 0;
526       high2    = nrow2;
527 
528       for (j=0; j<n; j++) {
529         if (v) {
530           if (roworiented) value = v[i*n+j];
531           else             value = v[i+j*m];
532         } else value = 0.0;
533         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
534         if (in[j] >= cstart && in[j] < cend) {
535           col   = in[j] - cstart;
536           nonew = a->nonew;
537           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
538         } else if (in[j] < 0) continue;
539 #if defined(PETSC_USE_DEBUG)
540         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
541 #endif
542         else {
543           if (mat->was_assembled) {
544             if (!aij->colmap) {
545               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
546             }
547 #if defined(PETSC_USE_CTABLE)
548             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
549             col--;
550 #else
551             col = aij->colmap[in[j]] - 1;
552 #endif
553             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
554               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
555               col  =  in[j];
556               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
557               B     = aij->B;
558               b     = (Mat_SeqAIJ*)B->data;
559               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
560               rp2   = bj + bi[row];
561               ap2   = ba + bi[row];
562               rmax2 = bimax[row];
563               nrow2 = bilen[row];
564               low2  = 0;
565               high2 = nrow2;
566               bm    = aij->B->rmap->n;
567               ba    = b->a;
568             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
569           } else col = in[j];
570           nonew = b->nonew;
571           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
572         }
573       }
574     } else {
575       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
576       if (!aij->donotstash) {
577         mat->assembled = PETSC_FALSE;
578         if (roworiented) {
579           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
580         } else {
581           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
582         }
583       }
584     }
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatGetValues_MPIAIJ"
591 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
592 {
593   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
596   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
597 
598   PetscFunctionBegin;
599   for (i=0; i<m; i++) {
600     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
601     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
602     if (idxm[i] >= rstart && idxm[i] < rend) {
603       row = idxm[i] - rstart;
604       for (j=0; j<n; j++) {
605         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
606         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
607         if (idxn[j] >= cstart && idxn[j] < cend) {
608           col  = idxn[j] - cstart;
609           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
610         } else {
611           if (!aij->colmap) {
612             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
613           }
614 #if defined(PETSC_USE_CTABLE)
615           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
616           col--;
617 #else
618           col = aij->colmap[idxn[j]] - 1;
619 #endif
620           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
621           else {
622             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
623           }
624         }
625       }
626     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
627   }
628   PetscFunctionReturn(0);
629 }
630 
631 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
635 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
636 {
637   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
638   PetscErrorCode ierr;
639   PetscInt       nstash,reallocs;
640   InsertMode     addv;
641 
642   PetscFunctionBegin;
643   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
644 
645   /* make sure all processors are either in INSERTMODE or ADDMODE */
646   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
647   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
648   mat->insertmode = addv; /* in case this processor had no cache */
649 
650   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
651   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
652   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 #undef __FUNCT__
657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
659 {
660   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
661   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
662   PetscErrorCode ierr;
663   PetscMPIInt    n;
664   PetscInt       i,j,rstart,ncols,flg;
665   PetscInt       *row,*col;
666   PetscBool      other_disassembled;
667   PetscScalar    *val;
668   InsertMode     addv = mat->insertmode;
669 
670   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
671 
672   PetscFunctionBegin;
673   if (!aij->donotstash && !mat->nooffprocentries) {
674     while (1) {
675       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
676       if (!flg) break;
677 
678       for (i=0; i<n; ) {
679         /* Now identify the consecutive vals belonging to the same row */
680         for (j=i,rstart=row[j]; j<n; j++) {
681           if (row[j] != rstart) break;
682         }
683         if (j < n) ncols = j-i;
684         else       ncols = n-i;
685         /* Now assemble all these values with a single function call */
686         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
687 
688         i = j;
689       }
690     }
691     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
692   }
693   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
694   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
695 
696   /* determine if any processor has disassembled, if so we must
697      also disassemble ourselfs, in order that we may reassemble. */
698   /*
699      if nonzero structure of submatrix B cannot change then we know that
700      no processor disassembled thus we can skip this stuff
701   */
702   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
703     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
704     if (mat->was_assembled && !other_disassembled) {
705       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
706     }
707   }
708   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
709     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
710   }
711   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
712   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
714   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
715 
716   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
717 
718   aij->rowvalues = 0;
719 
720   /* used by MatAXPY() */
721   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
722   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
723 
724   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
725   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
732 {
733   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
738   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatZeroRows_MPIAIJ"
744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
745 {
746   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
749   PetscInt          i,*owners = A->rmap->range;
750   PetscInt          *nprocs,j,idx,nsends,row;
751   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
752   PetscInt          *rvalues,count,base,slen,*source;
753   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
754   MPI_Comm          comm;
755   MPI_Request       *send_waits,*recv_waits;
756   MPI_Status        recv_status,*send_status;
757   const PetscScalar *xx;
758   PetscScalar       *bb;
759 #if defined(PETSC_DEBUG)
760   PetscBool found = PETSC_FALSE;
761 #endif
762 
763   PetscFunctionBegin;
764   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
765   /*  first count number of contributors to each processor */
766   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
767   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
768   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
769   j    = 0;
770   for (i=0; i<N; i++) {
771     if (lastidx > (idx = rows[i])) j = 0;
772     lastidx = idx;
773     for (; j<size; j++) {
774       if (idx >= owners[j] && idx < owners[j+1]) {
775         nprocs[2*j]++;
776         nprocs[2*j+1] = 1;
777         owner[i]      = j;
778 #if defined(PETSC_DEBUG)
779         found = PETSC_TRUE;
780 #endif
781         break;
782       }
783     }
784 #if defined(PETSC_DEBUG)
785     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
786     found = PETSC_FALSE;
787 #endif
788   }
789   nsends = 0;
790   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
791 
792   if (A->nooffproczerorows) {
793     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
794     nrecvs = nsends;
795     nmax   = N;
796   } else {
797     /* inform other processors of number of messages and max length*/
798     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
799   }
800 
801   /* post receives:   */
802   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
804   for (i=0; i<nrecvs; i++) {
805     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
806   }
807 
808   /* do sends:
809       1) starts[i] gives the starting index in svalues for stuff going to
810          the ith processor
811   */
812   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
813   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
814   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
815 
816   starts[0] = 0;
817   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
818   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
819 
820   starts[0] = 0;
821   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
822   count = 0;
823   for (i=0; i<size; i++) {
824     if (nprocs[2*i+1]) {
825       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
826     }
827   }
828   ierr = PetscFree(starts);CHKERRQ(ierr);
829 
830   base = owners[rank];
831 
832   /*  wait on receives */
833   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
834   count = nrecvs; slen = 0;
835   while (count) {
836     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
837     /* unpack receives into our local space */
838     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
839 
840     source[imdex] = recv_status.MPI_SOURCE;
841     lens[imdex]   = n;
842     slen         += n;
843     count--;
844   }
845   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
846 
847   /* move the data into the send scatter */
848   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
849   count = 0;
850   for (i=0; i<nrecvs; i++) {
851     values = rvalues + i*nmax;
852     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
853   }
854   ierr = PetscFree(rvalues);CHKERRQ(ierr);
855   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
856   ierr = PetscFree(owner);CHKERRQ(ierr);
857   ierr = PetscFree(nprocs);CHKERRQ(ierr);
858 
859   /* fix right hand side if needed */
860   if (x && b) {
861     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
862     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
863     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
864     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
865     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
866   }
867   /*
868         Zero the required rows. If the "diagonal block" of the matrix
869      is square and the user wishes to set the diagonal we use separate
870      code so that MatSetValues() is not called for each diagonal allocating
871      new memory, thus calling lots of mallocs and slowing things down.
872 
873   */
874   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
875   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
876   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
877     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
878   } else if (diag != 0.0) {
879     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
880     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
881     for (i = 0; i < slen; i++) {
882       row  = lrows[i] + rstart;
883       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
884     }
885     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
886     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887   } else {
888     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
889   }
890   ierr = PetscFree(lrows);CHKERRQ(ierr);
891 
892   /* wait on sends */
893   if (nsends) {
894     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
895     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
896     ierr = PetscFree(send_status);CHKERRQ(ierr);
897   }
898   ierr = PetscFree(send_waits);CHKERRQ(ierr);
899   ierr = PetscFree(svalues);CHKERRQ(ierr);
900   PetscFunctionReturn(0);
901 }
902 
903 #undef __FUNCT__
904 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
905 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
906 {
907   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
908   PetscErrorCode    ierr;
909   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
910   PetscInt          i,*owners = A->rmap->range;
911   PetscInt          *nprocs,j,idx,nsends;
912   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
913   PetscInt          *rvalues,count,base,slen,*source;
914   PetscInt          *lens,*lrows,*values,m;
915   MPI_Comm          comm;
916   MPI_Request       *send_waits,*recv_waits;
917   MPI_Status        recv_status,*send_status;
918   const PetscScalar *xx;
919   PetscScalar       *bb,*mask;
920   Vec               xmask,lmask;
921   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
922   const PetscInt    *aj, *ii,*ridx;
923   PetscScalar       *aa;
924 #if defined(PETSC_DEBUG)
925   PetscBool found = PETSC_FALSE;
926 #endif
927 
928   PetscFunctionBegin;
929   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
930   /*  first count number of contributors to each processor */
931   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
932   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
933   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
934   j    = 0;
935   for (i=0; i<N; i++) {
936     if (lastidx > (idx = rows[i])) j = 0;
937     lastidx = idx;
938     for (; j<size; j++) {
939       if (idx >= owners[j] && idx < owners[j+1]) {
940         nprocs[2*j]++;
941         nprocs[2*j+1] = 1;
942         owner[i]      = j;
943 #if defined(PETSC_DEBUG)
944         found = PETSC_TRUE;
945 #endif
946         break;
947       }
948     }
949 #if defined(PETSC_DEBUG)
950     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
951     found = PETSC_FALSE;
952 #endif
953   }
954   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
955 
956   /* inform other processors of number of messages and max length*/
957   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
958 
959   /* post receives:   */
960   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
961   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
962   for (i=0; i<nrecvs; i++) {
963     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
964   }
965 
966   /* do sends:
967       1) starts[i] gives the starting index in svalues for stuff going to
968          the ith processor
969   */
970   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
971   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
972   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
973 
974   starts[0] = 0;
975   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
976   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
977 
978   starts[0] = 0;
979   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
980   count = 0;
981   for (i=0; i<size; i++) {
982     if (nprocs[2*i+1]) {
983       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
984     }
985   }
986   ierr = PetscFree(starts);CHKERRQ(ierr);
987 
988   base = owners[rank];
989 
990   /*  wait on receives */
991   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
992   count = nrecvs; slen = 0;
993   while (count) {
994     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
995     /* unpack receives into our local space */
996     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
997 
998     source[imdex] = recv_status.MPI_SOURCE;
999     lens[imdex]   = n;
1000     slen         += n;
1001     count--;
1002   }
1003   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1004 
1005   /* move the data into the send scatter */
1006   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1007   count = 0;
1008   for (i=0; i<nrecvs; i++) {
1009     values = rvalues + i*nmax;
1010     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1011   }
1012   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1013   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1014   ierr = PetscFree(owner);CHKERRQ(ierr);
1015   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1016   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1017 
1018   /* zero diagonal part of matrix */
1019   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1020 
1021   /* handle off diagonal part of matrix */
1022   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1023   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1024   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1025   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use) {
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++) {
1050       n  = ii[i+1] - ii[i];
1051       aj = aij->j + ii[i];
1052       aa = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n  = ii[i+1] - ii[i];
1068       aj = aij->j + ii[i];
1069       aa = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMult_MPIAIJ"
1101 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105   PetscInt       nt;
1106 
1107   PetscFunctionBegin;
1108   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1109   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1110   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1111   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1112   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1113   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1119 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1120 {
1121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1132 {
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1138   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1139   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1147 {
1148   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1149   PetscErrorCode ierr;
1150   PetscBool      merged;
1151 
1152   PetscFunctionBegin;
1153   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1154   /* do nondiagonal part */
1155   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1156   if (!merged) {
1157     /* send it on its way */
1158     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159     /* do local part */
1160     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1161     /* receive remote parts: note this assumes the values are not actually */
1162     /* added in yy until the next line, */
1163     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1164   } else {
1165     /* do local part */
1166     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1167     /* send it on its way */
1168     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1169     /* values actually were received in the Begin() but we need to call this nop */
1170     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1171   }
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 EXTERN_C_BEGIN
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 EXTERN_C_END
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1221 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1222 {
1223   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1224   PetscErrorCode ierr;
1225 
1226   PetscFunctionBegin;
1227   /* do nondiagonal part */
1228   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1229   /* send it on its way */
1230   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1231   /* do local part */
1232   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1233   /* receive remote parts */
1234   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 /*
1239   This only works correctly for square matrices where the subblock A->A is the
1240    diagonal block
1241 */
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1244 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1245 {
1246   PetscErrorCode ierr;
1247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1248 
1249   PetscFunctionBegin;
1250   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1251   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1252   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatScale_MPIAIJ"
1258 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1259 {
1260   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1265   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatDestroy_MPIAIJ"
1271 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1272 {
1273   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1274   PetscErrorCode ierr;
1275 
1276   PetscFunctionBegin;
1277 #if defined(PETSC_USE_LOG)
1278   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1279 #endif
1280   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1281   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1283   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1284 #if defined(PETSC_USE_CTABLE)
1285   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1286 #else
1287   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1288 #endif
1289   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1290   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1291   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1292   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1293   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1294   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1295 
1296   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1311 {
1312   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1313   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1314   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1315   PetscErrorCode ierr;
1316   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1317   int            fd;
1318   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1319   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1320   PetscScalar    *column_values;
1321   PetscInt       message_count,flowcontrolcount;
1322   FILE           *file;
1323 
1324   PetscFunctionBegin;
1325   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1326   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1327   nz   = A->nz + B->nz;
1328   if (!rank) {
1329     header[0] = MAT_FILE_CLASSID;
1330     header[1] = mat->rmap->N;
1331     header[2] = mat->cmap->N;
1332 
1333     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1334     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1335     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1336     /* get largest number of rows any processor has */
1337     rlen  = mat->rmap->n;
1338     range = mat->rmap->range;
1339     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1340   } else {
1341     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1342     rlen = mat->rmap->n;
1343   }
1344 
1345   /* load up the local row counts */
1346   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1347   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1348 
1349   /* store the row lengths to the file */
1350   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1351   if (!rank) {
1352     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1353     for (i=1; i<size; i++) {
1354       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1355       rlen = range[i+1] - range[i];
1356       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1357       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     }
1359     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1360   } else {
1361     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1362     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1363     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1364   }
1365   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1366 
1367   /* load up the local column indices */
1368   nzmax = nz; /* th processor needs space a largest processor needs */
1369   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1370   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1371   cnt   = 0;
1372   for (i=0; i<mat->rmap->n; i++) {
1373     for (j=B->i[i]; j<B->i[i+1]; j++) {
1374       if ((col = garray[B->j[j]]) > cstart) break;
1375       column_indices[cnt++] = col;
1376     }
1377     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1378     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1379   }
1380   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1381 
1382   /* store the column indices to the file */
1383   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1384   if (!rank) {
1385     MPI_Status status;
1386     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1387     for (i=1; i<size; i++) {
1388       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1389       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1390       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1391       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1392       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1393     }
1394     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1395   } else {
1396     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1397     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1399     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1400   }
1401   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1402 
1403   /* load up the local column values */
1404   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1405   cnt  = 0;
1406   for (i=0; i<mat->rmap->n; i++) {
1407     for (j=B->i[i]; j<B->i[i+1]; j++) {
1408       if (garray[B->j[j]] > cstart) break;
1409       column_values[cnt++] = B->a[j];
1410     }
1411     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1412     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1413   }
1414   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1415 
1416   /* store the column values to the file */
1417   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1418   if (!rank) {
1419     MPI_Status status;
1420     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1421     for (i=1; i<size; i++) {
1422       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1423       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1424       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1425       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1426       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1427     }
1428     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1429   } else {
1430     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1431     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1433     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1434   }
1435   ierr = PetscFree(column_values);CHKERRQ(ierr);
1436 
1437   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1438   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 #include <petscdraw.h>
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1445 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1446 {
1447   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1448   PetscErrorCode    ierr;
1449   PetscMPIInt       rank = aij->rank,size = aij->size;
1450   PetscBool         isdraw,iascii,isbinary;
1451   PetscViewer       sviewer;
1452   PetscViewerFormat format;
1453 
1454   PetscFunctionBegin;
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1457   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1458   if (iascii) {
1459     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1460     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1461       MatInfo   info;
1462       PetscBool inodes;
1463 
1464       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1465       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1466       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1467       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1468       if (!inodes) {
1469         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1470                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1471       } else {
1472         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1473                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1474       }
1475       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1476       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1477       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1478       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1479       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1481       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1482       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1483       PetscFunctionReturn(0);
1484     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1485       PetscInt inodecount,inodelimit,*inodes;
1486       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1487       if (inodes) {
1488         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1489       } else {
1490         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1491       }
1492       PetscFunctionReturn(0);
1493     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1494       PetscFunctionReturn(0);
1495     }
1496   } else if (isbinary) {
1497     if (size == 1) {
1498       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1499       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1500     } else {
1501       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1502     }
1503     PetscFunctionReturn(0);
1504   } else if (isdraw) {
1505     PetscDraw draw;
1506     PetscBool isnull;
1507     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1508     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1509   }
1510 
1511   if (size == 1) {
1512     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1513     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1514   } else {
1515     /* assemble the entire matrix onto first processor. */
1516     Mat        A;
1517     Mat_SeqAIJ *Aloc;
1518     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1519     MatScalar  *a;
1520 
1521     if (mat->rmap->N > 1024) {
1522       PetscBool flg = PETSC_FALSE;
1523 
1524       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1525       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1526     }
1527 
1528     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1529     if (!rank) {
1530       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1531     } else {
1532       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1533     }
1534     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1535     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1536     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1537     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1538     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1539 
1540     /* copy over the A part */
1541     Aloc = (Mat_SeqAIJ*)aij->A->data;
1542     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1543     row  = mat->rmap->rstart;
1544     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1545     for (i=0; i<m; i++) {
1546       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1547       row++;
1548       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1549     }
1550     aj = Aloc->j;
1551     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1552 
1553     /* copy over the B part */
1554     Aloc = (Mat_SeqAIJ*)aij->B->data;
1555     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1556     row  = mat->rmap->rstart;
1557     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1558     ct   = cols;
1559     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1560     for (i=0; i<m; i++) {
1561       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1562       row++;
1563       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1564     }
1565     ierr = PetscFree(ct);CHKERRQ(ierr);
1566     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1568     /*
1569        Everyone has to call to draw the matrix since the graphics waits are
1570        synchronized across all processors that share the PetscDraw object
1571     */
1572     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1573     if (!rank) {
1574       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1575       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1576       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1577       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1578     }
1579     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1580     ierr = MatDestroy(&A);CHKERRQ(ierr);
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatView_MPIAIJ"
1587 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1588 {
1589   PetscErrorCode ierr;
1590   PetscBool      iascii,isdraw,issocket,isbinary;
1591 
1592   PetscFunctionBegin;
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1596   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1597   if (iascii || isdraw || isbinary || issocket) {
1598     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1599   }
1600   PetscFunctionReturn(0);
1601 }
1602 
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatSOR_MPIAIJ"
1605 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1606 {
1607   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1608   PetscErrorCode ierr;
1609   Vec            bb1 = 0;
1610   PetscBool      hasop;
1611 
1612   PetscFunctionBegin;
1613   if (flag == SOR_APPLY_UPPER) {
1614     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1615     PetscFunctionReturn(0);
1616   }
1617 
1618   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1619     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1620   }
1621 
1622   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1623     if (flag & SOR_ZERO_INITIAL_GUESS) {
1624       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1625       its--;
1626     }
1627 
1628     while (its--) {
1629       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1631 
1632       /* update rhs: bb1 = bb - B*x */
1633       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1634       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1635 
1636       /* local sweep */
1637       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1638     }
1639   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644     while (its--) {
1645       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647 
1648       /* update rhs: bb1 = bb - B*x */
1649       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1650       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1651 
1652       /* local sweep */
1653       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1654     }
1655   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1656     if (flag & SOR_ZERO_INITIAL_GUESS) {
1657       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1658       its--;
1659     }
1660     while (its--) {
1661       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663 
1664       /* update rhs: bb1 = bb - B*x */
1665       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1666       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1667 
1668       /* local sweep */
1669       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1670     }
1671   } else if (flag & SOR_EISENSTAT) {
1672     Vec xx1;
1673 
1674     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1675     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1676 
1677     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679     if (!mat->diag) {
1680       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1681       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1682     }
1683     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1684     if (hasop) {
1685       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1686     } else {
1687       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1688     }
1689     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1690 
1691     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1692 
1693     /* local sweep */
1694     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1695     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1696     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1697   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1698 
1699   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatPermute_MPIAIJ"
1705 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1706 {
1707   Mat            aA,aB,Aperm;
1708   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1709   PetscScalar    *aa,*ba;
1710   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1711   PetscSF        rowsf,sf;
1712   IS             parcolp = NULL;
1713   PetscBool      done;
1714   PetscErrorCode ierr;
1715 
1716   PetscFunctionBegin;
1717   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1718   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1719   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1720   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1721 
1722   /* Invert row permutation to find out where my rows should go */
1723   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1724   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1725   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1726   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1727   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1728   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1729 
1730   /* Invert column permutation to find out where my columns should go */
1731   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1732   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1733   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1734   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1735   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1737   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1738 
1739   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1740   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1741   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1742 
1743   /* Find out where my gcols should go */
1744   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1745   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1746   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1747   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1748   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1749   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1751   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1752 
1753   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1754   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1757   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1758   for (i=0; i<m; i++) {
1759     PetscInt row = rdest[i],rowner;
1760     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1761     for (j=ai[i]; j<ai[i+1]; j++) {
1762       PetscInt cowner,col = cdest[aj[j]];
1763       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1764       if (rowner == cowner) dnnz[i]++;
1765       else onnz[i]++;
1766     }
1767     for (j=bi[i]; j<bi[i+1]; j++) {
1768       PetscInt cowner,col = gcdest[bj[j]];
1769       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1770       if (rowner == cowner) dnnz[i]++;
1771       else onnz[i]++;
1772     }
1773   }
1774   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1776   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1780   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1781 
1782   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1784   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1785   for (i=0; i<m; i++) {
1786     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1787     PetscInt rowlen;
1788     rowlen = ai[i+1] - ai[i];
1789     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1790     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1791     rowlen = bi[i+1] - bi[i];
1792     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1793     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1794   }
1795   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1798   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1800   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1801   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1802   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1803   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1804   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1805   *B = Aperm;
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1811 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1812 {
1813   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1814   Mat            A    = mat->A,B = mat->B;
1815   PetscErrorCode ierr;
1816   PetscReal      isend[5],irecv[5];
1817 
1818   PetscFunctionBegin;
1819   info->block_size = 1.0;
1820   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1821 
1822   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1823   isend[3] = info->memory;  isend[4] = info->mallocs;
1824 
1825   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1826 
1827   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1828   isend[3] += info->memory;  isend[4] += info->mallocs;
1829   if (flag == MAT_LOCAL) {
1830     info->nz_used      = isend[0];
1831     info->nz_allocated = isend[1];
1832     info->nz_unneeded  = isend[2];
1833     info->memory       = isend[3];
1834     info->mallocs      = isend[4];
1835   } else if (flag == MAT_GLOBAL_MAX) {
1836     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1837 
1838     info->nz_used      = irecv[0];
1839     info->nz_allocated = irecv[1];
1840     info->nz_unneeded  = irecv[2];
1841     info->memory       = irecv[3];
1842     info->mallocs      = irecv[4];
1843   } else if (flag == MAT_GLOBAL_SUM) {
1844     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1845 
1846     info->nz_used      = irecv[0];
1847     info->nz_allocated = irecv[1];
1848     info->nz_unneeded  = irecv[2];
1849     info->memory       = irecv[3];
1850     info->mallocs      = irecv[4];
1851   }
1852   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1853   info->fill_ratio_needed = 0;
1854   info->factor_mallocs    = 0;
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 #undef __FUNCT__
1859 #define __FUNCT__ "MatSetOption_MPIAIJ"
1860 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1861 {
1862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1863   PetscErrorCode ierr;
1864 
1865   PetscFunctionBegin;
1866   switch (op) {
1867   case MAT_NEW_NONZERO_LOCATIONS:
1868   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1869   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1870   case MAT_KEEP_NONZERO_PATTERN:
1871   case MAT_NEW_NONZERO_LOCATION_ERR:
1872   case MAT_USE_INODES:
1873   case MAT_IGNORE_ZERO_ENTRIES:
1874     MatCheckPreallocated(A,1);
1875     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1876     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1877     break;
1878   case MAT_ROW_ORIENTED:
1879     a->roworiented = flg;
1880 
1881     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1882     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1883     break;
1884   case MAT_NEW_DIAGONALS:
1885     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1886     break;
1887   case MAT_IGNORE_OFF_PROC_ENTRIES:
1888     a->donotstash = flg;
1889     break;
1890   case MAT_SPD:
1891     A->spd_set = PETSC_TRUE;
1892     A->spd     = flg;
1893     if (flg) {
1894       A->symmetric                  = PETSC_TRUE;
1895       A->structurally_symmetric     = PETSC_TRUE;
1896       A->symmetric_set              = PETSC_TRUE;
1897       A->structurally_symmetric_set = PETSC_TRUE;
1898     }
1899     break;
1900   case MAT_SYMMETRIC:
1901     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1902     break;
1903   case MAT_STRUCTURALLY_SYMMETRIC:
1904     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1905     break;
1906   case MAT_HERMITIAN:
1907     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1908     break;
1909   case MAT_SYMMETRY_ETERNAL:
1910     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1911     break;
1912   default:
1913     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1914   }
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatGetRow_MPIAIJ"
1920 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1921 {
1922   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1923   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1924   PetscErrorCode ierr;
1925   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1926   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1927   PetscInt       *cmap,*idx_p;
1928 
1929   PetscFunctionBegin;
1930   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1931   mat->getrowactive = PETSC_TRUE;
1932 
1933   if (!mat->rowvalues && (idx || v)) {
1934     /*
1935         allocate enough space to hold information from the longest row.
1936     */
1937     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1938     PetscInt   max = 1,tmp;
1939     for (i=0; i<matin->rmap->n; i++) {
1940       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1941       if (max < tmp) max = tmp;
1942     }
1943     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1944   }
1945 
1946   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1947   lrow = row - rstart;
1948 
1949   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1950   if (!v)   {pvA = 0; pvB = 0;}
1951   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1952   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1953   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1954   nztot = nzA + nzB;
1955 
1956   cmap = mat->garray;
1957   if (v  || idx) {
1958     if (nztot) {
1959       /* Sort by increasing column numbers, assuming A and B already sorted */
1960       PetscInt imark = -1;
1961       if (v) {
1962         *v = v_p = mat->rowvalues;
1963         for (i=0; i<nzB; i++) {
1964           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1965           else break;
1966         }
1967         imark = i;
1968         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1969         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1970       }
1971       if (idx) {
1972         *idx = idx_p = mat->rowindices;
1973         if (imark > -1) {
1974           for (i=0; i<imark; i++) {
1975             idx_p[i] = cmap[cworkB[i]];
1976           }
1977         } else {
1978           for (i=0; i<nzB; i++) {
1979             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1980             else break;
1981           }
1982           imark = i;
1983         }
1984         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1985         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1986       }
1987     } else {
1988       if (idx) *idx = 0;
1989       if (v)   *v   = 0;
1990     }
1991   }
1992   *nz  = nztot;
1993   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1994   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
2000 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2001 {
2002   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2003 
2004   PetscFunctionBegin;
2005   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2006   aij->getrowactive = PETSC_FALSE;
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "MatNorm_MPIAIJ"
2012 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2013 {
2014   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2015   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2016   PetscErrorCode ierr;
2017   PetscInt       i,j,cstart = mat->cmap->rstart;
2018   PetscReal      sum = 0.0;
2019   MatScalar      *v;
2020 
2021   PetscFunctionBegin;
2022   if (aij->size == 1) {
2023     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2024   } else {
2025     if (type == NORM_FROBENIUS) {
2026       v = amat->a;
2027       for (i=0; i<amat->nz; i++) {
2028         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2029       }
2030       v = bmat->a;
2031       for (i=0; i<bmat->nz; i++) {
2032         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2033       }
2034       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2035       *norm = PetscSqrtReal(*norm);
2036     } else if (type == NORM_1) { /* max column norm */
2037       PetscReal *tmp,*tmp2;
2038       PetscInt  *jj,*garray = aij->garray;
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2040       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2041       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2042       *norm = 0.0;
2043       v     = amat->a; jj = amat->j;
2044       for (j=0; j<amat->nz; j++) {
2045         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2046       }
2047       v = bmat->a; jj = bmat->j;
2048       for (j=0; j<bmat->nz; j++) {
2049         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2050       }
2051       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2052       for (j=0; j<mat->cmap->N; j++) {
2053         if (tmp2[j] > *norm) *norm = tmp2[j];
2054       }
2055       ierr = PetscFree(tmp);CHKERRQ(ierr);
2056       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2057     } else if (type == NORM_INFINITY) { /* max row norm */
2058       PetscReal ntemp = 0.0;
2059       for (j=0; j<aij->A->rmap->n; j++) {
2060         v   = amat->a + amat->i[j];
2061         sum = 0.0;
2062         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2063           sum += PetscAbsScalar(*v); v++;
2064         }
2065         v = bmat->a + bmat->i[j];
2066         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2067           sum += PetscAbsScalar(*v); v++;
2068         }
2069         if (sum > ntemp) ntemp = sum;
2070       }
2071       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2072     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2073   }
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatTranspose_MPIAIJ"
2079 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2080 {
2081   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2082   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2083   PetscErrorCode ierr;
2084   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2085   PetscInt       cstart = A->cmap->rstart,ncol;
2086   Mat            B;
2087   MatScalar      *array;
2088 
2089   PetscFunctionBegin;
2090   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2091 
2092   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2093   ai = Aloc->i; aj = Aloc->j;
2094   bi = Bloc->i; bj = Bloc->j;
2095   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2096     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2097     PetscSFNode          *oloc;
2098     PETSC_UNUSED PetscSF sf;
2099 
2100     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2101     /* compute d_nnz for preallocation */
2102     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2103     for (i=0; i<ai[ma]; i++) {
2104       d_nnz[aj[i]]++;
2105       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2106     }
2107     /* compute local off-diagonal contributions */
2108     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2109     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2110     /* map those to global */
2111     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2112     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2113     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2114     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2115     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2117     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2118 
2119     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2120     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2121     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2122     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2123     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2124     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2125   } else {
2126     B    = *matout;
2127     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2128     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2129   }
2130 
2131   /* copy over the A part */
2132   array = Aloc->a;
2133   row   = A->rmap->rstart;
2134   for (i=0; i<ma; i++) {
2135     ncol = ai[i+1]-ai[i];
2136     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2137     row++;
2138     array += ncol; aj += ncol;
2139   }
2140   aj = Aloc->j;
2141   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2142 
2143   /* copy over the B part */
2144   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2145   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2146   array = Bloc->a;
2147   row   = A->rmap->rstart;
2148   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2149   cols_tmp = cols;
2150   for (i=0; i<mb; i++) {
2151     ncol = bi[i+1]-bi[i];
2152     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2153     row++;
2154     array += ncol; cols_tmp += ncol;
2155   }
2156   ierr = PetscFree(cols);CHKERRQ(ierr);
2157 
2158   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2160   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2161     *matout = B;
2162   } else {
2163     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2164   }
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2170 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2171 {
2172   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2173   Mat            a    = aij->A,b = aij->B;
2174   PetscErrorCode ierr;
2175   PetscInt       s1,s2,s3;
2176 
2177   PetscFunctionBegin;
2178   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2179   if (rr) {
2180     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2181     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2182     /* Overlap communication with computation. */
2183     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2184   }
2185   if (ll) {
2186     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2187     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2188     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2189   }
2190   /* scale  the diagonal block */
2191   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2192 
2193   if (rr) {
2194     /* Do a scatter end and then right scale the off-diagonal block */
2195     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2196     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2197   }
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 #undef __FUNCT__
2202 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2203 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2204 {
2205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2206   PetscErrorCode ierr;
2207 
2208   PetscFunctionBegin;
2209   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 #undef __FUNCT__
2214 #define __FUNCT__ "MatEqual_MPIAIJ"
2215 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2216 {
2217   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2218   Mat            a,b,c,d;
2219   PetscBool      flg;
2220   PetscErrorCode ierr;
2221 
2222   PetscFunctionBegin;
2223   a = matA->A; b = matA->B;
2224   c = matB->A; d = matB->B;
2225 
2226   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2227   if (flg) {
2228     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2229   }
2230   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 #undef __FUNCT__
2235 #define __FUNCT__ "MatCopy_MPIAIJ"
2236 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2237 {
2238   PetscErrorCode ierr;
2239   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2240   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2241 
2242   PetscFunctionBegin;
2243   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2244   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2245     /* because of the column compression in the off-processor part of the matrix a->B,
2246        the number of columns in a->B and b->B may be different, hence we cannot call
2247        the MatCopy() directly on the two parts. If need be, we can provide a more
2248        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2249        then copying the submatrices */
2250     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2251   } else {
2252     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2253     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2254   }
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatSetUp_MPIAIJ"
2260 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2261 {
2262   PetscErrorCode ierr;
2263 
2264   PetscFunctionBegin;
2265   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2271 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2272 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2273 {
2274   PetscInt       i,m=Y->rmap->N;
2275   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2276   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2277   const PetscInt *xi = x->i,*yi = y->i;
2278 
2279   PetscFunctionBegin;
2280   /* Set the number of nonzeros in the new matrix */
2281   for (i=0; i<m; i++) {
2282     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2283     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2284     nnz[i] = 0;
2285     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2286       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2287       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2288       nnz[i]++;
2289     }
2290     for (; k<nzy; k++) nnz[i]++;
2291   }
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatAXPY_MPIAIJ"
2297 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2298 {
2299   PetscErrorCode ierr;
2300   PetscInt       i;
2301   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2302   PetscBLASInt   bnz,one=1;
2303   Mat_SeqAIJ     *x,*y;
2304 
2305   PetscFunctionBegin;
2306   if (str == SAME_NONZERO_PATTERN) {
2307     PetscScalar alpha = a;
2308     x    = (Mat_SeqAIJ*)xx->A->data;
2309     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2310     y    = (Mat_SeqAIJ*)yy->A->data;
2311     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2312     x    = (Mat_SeqAIJ*)xx->B->data;
2313     y    = (Mat_SeqAIJ*)yy->B->data;
2314     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2315     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2316   } else if (str == SUBSET_NONZERO_PATTERN) {
2317     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2318 
2319     x = (Mat_SeqAIJ*)xx->B->data;
2320     y = (Mat_SeqAIJ*)yy->B->data;
2321     if (y->xtoy && y->XtoY != xx->B) {
2322       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2323       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2324     }
2325     if (!y->xtoy) { /* get xtoy */
2326       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2327       y->XtoY = xx->B;
2328       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2329     }
2330     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2331   } else {
2332     Mat      B;
2333     PetscInt *nnz_d,*nnz_o;
2334     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2335     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2336     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2337     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2338     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2339     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2340     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2342     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2343     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2344     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2345     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2347     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2348   }
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2353 
2354 #undef __FUNCT__
2355 #define __FUNCT__ "MatConjugate_MPIAIJ"
2356 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2357 {
2358 #if defined(PETSC_USE_COMPLEX)
2359   PetscErrorCode ierr;
2360   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2361 
2362   PetscFunctionBegin;
2363   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2364   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2365 #else
2366   PetscFunctionBegin;
2367 #endif
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 #undef __FUNCT__
2372 #define __FUNCT__ "MatRealPart_MPIAIJ"
2373 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2374 {
2375   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2380   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2386 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2387 {
2388   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2393   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #if defined(PETSC_HAVE_PBGL)
2398 
2399 #include <boost/parallel/mpi/bsp_process_group.hpp>
2400 #include <boost/graph/distributed/ilu_default_graph.hpp>
2401 #include <boost/graph/distributed/ilu_0_block.hpp>
2402 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2403 #include <boost/graph/distributed/petsc/interface.hpp>
2404 #include <boost/multi_array.hpp>
2405 #include <boost/parallel/distributed_property_map->hpp>
2406 
2407 #undef __FUNCT__
2408 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2409 /*
2410   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2411 */
2412 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2413 {
2414   namespace petsc = boost::distributed::petsc;
2415 
2416   namespace graph_dist = boost::graph::distributed;
2417   using boost::graph::distributed::ilu_default::process_group_type;
2418   using boost::graph::ilu_permuted;
2419 
2420   PetscBool      row_identity, col_identity;
2421   PetscContainer c;
2422   PetscInt       m, n, M, N;
2423   PetscErrorCode ierr;
2424 
2425   PetscFunctionBegin;
2426   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2427   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2428   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2429   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2430 
2431   process_group_type pg;
2432   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2433   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2434   lgraph_type& level_graph = *lgraph_p;
2435   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2436 
2437   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2438   ilu_permuted(level_graph);
2439 
2440   /* put together the new matrix */
2441   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2442   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2443   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2444   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2445   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2446   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2447   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2449 
2450   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2451   ierr = PetscContainerSetPointer(c, lgraph_p);
2452   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2453   ierr = PetscContainerDestroy(&c);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 #undef __FUNCT__
2458 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2459 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2460 {
2461   PetscFunctionBegin;
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatSolve_MPIAIJ"
2467 /*
2468   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2469 */
2470 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2471 {
2472   namespace graph_dist = boost::graph::distributed;
2473 
2474   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2475   lgraph_type    *lgraph_p;
2476   PetscContainer c;
2477   PetscErrorCode ierr;
2478 
2479   PetscFunctionBegin;
2480   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2481   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2482   ierr = VecCopy(b, x);CHKERRQ(ierr);
2483 
2484   PetscScalar *array_x;
2485   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2486   PetscInt sx;
2487   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2488 
2489   PetscScalar *array_b;
2490   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2491   PetscInt sb;
2492   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2493 
2494   lgraph_type& level_graph = *lgraph_p;
2495   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2496 
2497   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2498   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2499   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2500 
2501   typedef boost::iterator_property_map<array_ref_type::iterator,
2502                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2503   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2504   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2505 
2506   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2507   PetscFunctionReturn(0);
2508 }
2509 #endif
2510 
2511 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2512   PetscInt    nzlocal,nsends,nrecvs;
2513   PetscMPIInt *send_rank,*recv_rank;
2514   PetscInt    *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2515   PetscScalar *sbuf_a,**rbuf_a;
2516   PetscErrorCode (*Destroy)(Mat);
2517 } Mat_Redundant;
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2521 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2522 {
2523   PetscErrorCode ierr;
2524   Mat_Redundant  *redund=(Mat_Redundant*)ptr;
2525   PetscInt       i;
2526 
2527   PetscFunctionBegin;
2528   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2529   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2530   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2531   for (i=0; i<redund->nrecvs; i++) {
2532     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2533     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2534   }
2535   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2536   ierr = PetscFree(redund);CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatDestroy_MatRedundant"
2542 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2543 {
2544   PetscErrorCode ierr;
2545   PetscContainer container;
2546   Mat_Redundant  *redund=NULL;
2547 
2548   PetscFunctionBegin;
2549   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2550   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2551   ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2552 
2553   A->ops->destroy = redund->Destroy;
2554 
2555   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2556   if (A->ops->destroy) {
2557     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2558   }
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2564 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2565 {
2566   PetscMPIInt    rank,size;
2567   MPI_Comm       comm;
2568   PetscErrorCode ierr;
2569   PetscInt       nsends    = 0,nrecvs=0,i,rownz_max=0;
2570   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL;
2571   PetscInt       *rowrange = mat->rmap->range;
2572   Mat_MPIAIJ     *aij      = (Mat_MPIAIJ*)mat->data;
2573   Mat            A         = aij->A,B=aij->B,C=*matredundant;
2574   Mat_SeqAIJ     *a        = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2575   PetscScalar    *sbuf_a;
2576   PetscInt       nzlocal=a->nz+b->nz;
2577   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2578   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2579   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2580   MatScalar      *aworkA,*aworkB;
2581   PetscScalar    *vals;
2582   PetscMPIInt    tag1,tag2,tag3,imdex;
2583   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2584   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2585   MPI_Status     recv_status,*send_status;
2586   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2587   PetscInt       **rbuf_j=NULL;
2588   PetscScalar    **rbuf_a=NULL;
2589   Mat_Redundant  *redund =NULL;
2590   PetscContainer container;
2591 
2592   PetscFunctionBegin;
2593   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2594   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2595   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2596 
2597   if (reuse == MAT_REUSE_MATRIX) {
2598     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2599     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2600     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2601     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2602     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2603     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2604     ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2605     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2606 
2607     nsends    = redund->nsends;
2608     nrecvs    = redund->nrecvs;
2609     send_rank = redund->send_rank;
2610     recv_rank = redund->recv_rank;
2611     sbuf_nz   = redund->sbuf_nz;
2612     rbuf_nz   = redund->rbuf_nz;
2613     sbuf_j    = redund->sbuf_j;
2614     sbuf_a    = redund->sbuf_a;
2615     rbuf_j    = redund->rbuf_j;
2616     rbuf_a    = redund->rbuf_a;
2617   }
2618 
2619   if (reuse == MAT_INITIAL_MATRIX) {
2620     PetscMPIInt subrank,subsize;
2621     PetscInt    nleftover,np_subcomm;
2622     /* get the destination processors' id send_rank, nsends and nrecvs */
2623     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2624     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2625     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2626 
2627     np_subcomm = size/nsubcomm;
2628     nleftover  = size - nsubcomm*np_subcomm;
2629 
2630     nsends = 0; nrecvs = 0;
2631     for (i=0; i<size; i++) { /* i=rank*/
2632       if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2633         send_rank[nsends]   = i; nsends++;
2634         recv_rank[nrecvs++] = i;
2635       }
2636     }
2637     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2638       i = size-nleftover-1;
2639       j = 0;
2640       while (j < nsubcomm - nleftover) {
2641         send_rank[nsends++] = i;
2642         i--; j++;
2643       }
2644     }
2645 
2646     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2647       for (i=0; i<nleftover; i++) {
2648         recv_rank[nrecvs++] = size-nleftover+i;
2649       }
2650     }
2651 
2652     /* allocate sbuf_j, sbuf_a */
2653     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2654     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2655     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2656   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2657 
2658   /* copy mat's local entries into the buffers */
2659   if (reuse == MAT_INITIAL_MATRIX) {
2660     rownz_max = 0;
2661     rptr      = sbuf_j;
2662     cols      = sbuf_j + rend-rstart + 1;
2663     vals      = sbuf_a;
2664     rptr[0]   = 0;
2665     for (i=0; i<rend-rstart; i++) {
2666       row    = i + rstart;
2667       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2668       ncols  = nzA + nzB;
2669       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2670       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2671       /* load the column indices for this row into cols */
2672       lwrite = 0;
2673       for (l=0; l<nzB; l++) {
2674         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2675           vals[lwrite]   = aworkB[l];
2676           cols[lwrite++] = ctmp;
2677         }
2678       }
2679       for (l=0; l<nzA; l++) {
2680         vals[lwrite]   = aworkA[l];
2681         cols[lwrite++] = cstart + cworkA[l];
2682       }
2683       for (l=0; l<nzB; l++) {
2684         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2685           vals[lwrite]   = aworkB[l];
2686           cols[lwrite++] = ctmp;
2687         }
2688       }
2689       vals     += ncols;
2690       cols     += ncols;
2691       rptr[i+1] = rptr[i] + ncols;
2692       if (rownz_max < ncols) rownz_max = ncols;
2693     }
2694     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2695   } else { /* only copy matrix values into sbuf_a */
2696     rptr    = sbuf_j;
2697     vals    = sbuf_a;
2698     rptr[0] = 0;
2699     for (i=0; i<rend-rstart; i++) {
2700       row    = i + rstart;
2701       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2702       ncols  = nzA + nzB;
2703       cworkB = b->j + b->i[i];
2704       aworkA = a->a + a->i[i];
2705       aworkB = b->a + b->i[i];
2706       lwrite = 0;
2707       for (l=0; l<nzB; l++) {
2708         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2709       }
2710       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2711       for (l=0; l<nzB; l++) {
2712         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2713       }
2714       vals     += ncols;
2715       rptr[i+1] = rptr[i] + ncols;
2716     }
2717   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2718 
2719   /* send nzlocal to others, and recv other's nzlocal */
2720   /*--------------------------------------------------*/
2721   if (reuse == MAT_INITIAL_MATRIX) {
2722     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2723 
2724     s_waits2 = s_waits3 + nsends;
2725     s_waits1 = s_waits2 + nsends;
2726     r_waits1 = s_waits1 + nsends;
2727     r_waits2 = r_waits1 + nrecvs;
2728     r_waits3 = r_waits2 + nrecvs;
2729   } else {
2730     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2731 
2732     r_waits3 = s_waits3 + nsends;
2733   }
2734 
2735   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2736   if (reuse == MAT_INITIAL_MATRIX) {
2737     /* get new tags to keep the communication clean */
2738     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2739     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2740     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2741 
2742     /* post receives of other's nzlocal */
2743     for (i=0; i<nrecvs; i++) {
2744       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2745     }
2746     /* send nzlocal to others */
2747     for (i=0; i<nsends; i++) {
2748       sbuf_nz[i] = nzlocal;
2749       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2750     }
2751     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2752     count = nrecvs;
2753     while (count) {
2754       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2755 
2756       recv_rank[imdex] = recv_status.MPI_SOURCE;
2757       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2758       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2759 
2760       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2761 
2762       rbuf_nz[imdex] += i + 2;
2763 
2764       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2765       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2766       count--;
2767     }
2768     /* wait on sends of nzlocal */
2769     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2770     /* send mat->i,j to others, and recv from other's */
2771     /*------------------------------------------------*/
2772     for (i=0; i<nsends; i++) {
2773       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2774       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2775     }
2776     /* wait on receives of mat->i,j */
2777     /*------------------------------*/
2778     count = nrecvs;
2779     while (count) {
2780       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2781       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2782       count--;
2783     }
2784     /* wait on sends of mat->i,j */
2785     /*---------------------------*/
2786     if (nsends) {
2787       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2788     }
2789   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2790 
2791   /* post receives, send and receive mat->a */
2792   /*----------------------------------------*/
2793   for (imdex=0; imdex<nrecvs; imdex++) {
2794     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2795   }
2796   for (i=0; i<nsends; i++) {
2797     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2798   }
2799   count = nrecvs;
2800   while (count) {
2801     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2802     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2803     count--;
2804   }
2805   if (nsends) {
2806     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2807   }
2808 
2809   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2810 
2811   /* create redundant matrix */
2812   /*-------------------------*/
2813   if (reuse == MAT_INITIAL_MATRIX) {
2814     /* compute rownz_max for preallocation */
2815     for (imdex=0; imdex<nrecvs; imdex++) {
2816       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2817       rptr = rbuf_j[imdex];
2818       for (i=0; i<j; i++) {
2819         ncols = rptr[i+1] - rptr[i];
2820         if (rownz_max < ncols) rownz_max = ncols;
2821       }
2822     }
2823 
2824     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2825     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2826     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2827     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2828     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2829     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2830   } else {
2831     C = *matredundant;
2832   }
2833 
2834   /* insert local matrix entries */
2835   rptr = sbuf_j;
2836   cols = sbuf_j + rend-rstart + 1;
2837   vals = sbuf_a;
2838   for (i=0; i<rend-rstart; i++) {
2839     row   = i + rstart;
2840     ncols = rptr[i+1] - rptr[i];
2841     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2842     vals += ncols;
2843     cols += ncols;
2844   }
2845   /* insert received matrix entries */
2846   for (imdex=0; imdex<nrecvs; imdex++) {
2847     rstart = rowrange[recv_rank[imdex]];
2848     rend   = rowrange[recv_rank[imdex]+1];
2849     rptr   = rbuf_j[imdex];
2850     cols   = rbuf_j[imdex] + rend-rstart + 1;
2851     vals   = rbuf_a[imdex];
2852     for (i=0; i<rend-rstart; i++) {
2853       row   = i + rstart;
2854       ncols = rptr[i+1] - rptr[i];
2855       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2856       vals += ncols;
2857       cols += ncols;
2858     }
2859   }
2860   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2861   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2863   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2864   if (reuse == MAT_INITIAL_MATRIX) {
2865     PetscContainer container;
2866     *matredundant = C;
2867     /* create a supporting struct and attach it to C for reuse */
2868     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2869     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2870     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2871     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2872     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2873     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2874 
2875     redund->nzlocal   = nzlocal;
2876     redund->nsends    = nsends;
2877     redund->nrecvs    = nrecvs;
2878     redund->send_rank = send_rank;
2879     redund->recv_rank = recv_rank;
2880     redund->sbuf_nz   = sbuf_nz;
2881     redund->rbuf_nz   = rbuf_nz;
2882     redund->sbuf_j    = sbuf_j;
2883     redund->sbuf_a    = sbuf_a;
2884     redund->rbuf_j    = rbuf_j;
2885     redund->rbuf_a    = rbuf_a;
2886 
2887     redund->Destroy = C->ops->destroy;
2888     C->ops->destroy = MatDestroy_MatRedundant;
2889   }
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 #undef __FUNCT__
2894 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2895 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2896 {
2897   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2898   PetscErrorCode ierr;
2899   PetscInt       i,*idxb = 0;
2900   PetscScalar    *va,*vb;
2901   Vec            vtmp;
2902 
2903   PetscFunctionBegin;
2904   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2905   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2906   if (idx) {
2907     for (i=0; i<A->rmap->n; i++) {
2908       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2909     }
2910   }
2911 
2912   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2913   if (idx) {
2914     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2915   }
2916   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2917   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2918 
2919   for (i=0; i<A->rmap->n; i++) {
2920     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2921       va[i] = vb[i];
2922       if (idx) idx[i] = a->garray[idxb[i]];
2923     }
2924   }
2925 
2926   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2927   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2928   ierr = PetscFree(idxb);CHKERRQ(ierr);
2929   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2930   PetscFunctionReturn(0);
2931 }
2932 
2933 #undef __FUNCT__
2934 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2935 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2936 {
2937   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2938   PetscErrorCode ierr;
2939   PetscInt       i,*idxb = 0;
2940   PetscScalar    *va,*vb;
2941   Vec            vtmp;
2942 
2943   PetscFunctionBegin;
2944   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2945   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2946   if (idx) {
2947     for (i=0; i<A->cmap->n; i++) {
2948       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2949     }
2950   }
2951 
2952   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2953   if (idx) {
2954     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2955   }
2956   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2957   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2958 
2959   for (i=0; i<A->rmap->n; i++) {
2960     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2961       va[i] = vb[i];
2962       if (idx) idx[i] = a->garray[idxb[i]];
2963     }
2964   }
2965 
2966   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2967   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2968   ierr = PetscFree(idxb);CHKERRQ(ierr);
2969   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2975 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2976 {
2977   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2978   PetscInt       n      = A->rmap->n;
2979   PetscInt       cstart = A->cmap->rstart;
2980   PetscInt       *cmap  = mat->garray;
2981   PetscInt       *diagIdx, *offdiagIdx;
2982   Vec            diagV, offdiagV;
2983   PetscScalar    *a, *diagA, *offdiagA;
2984   PetscInt       r;
2985   PetscErrorCode ierr;
2986 
2987   PetscFunctionBegin;
2988   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2989   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2990   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2991   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2992   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2993   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2994   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2995   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2996   for (r = 0; r < n; ++r) {
2997     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2998       a[r]   = diagA[r];
2999       idx[r] = cstart + diagIdx[r];
3000     } else {
3001       a[r]   = offdiagA[r];
3002       idx[r] = cmap[offdiagIdx[r]];
3003     }
3004   }
3005   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3006   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3007   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3008   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3009   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3010   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3011   PetscFunctionReturn(0);
3012 }
3013 
3014 #undef __FUNCT__
3015 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3016 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3017 {
3018   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3019   PetscInt       n      = A->rmap->n;
3020   PetscInt       cstart = A->cmap->rstart;
3021   PetscInt       *cmap  = mat->garray;
3022   PetscInt       *diagIdx, *offdiagIdx;
3023   Vec            diagV, offdiagV;
3024   PetscScalar    *a, *diagA, *offdiagA;
3025   PetscInt       r;
3026   PetscErrorCode ierr;
3027 
3028   PetscFunctionBegin;
3029   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3030   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3031   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3032   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3033   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3034   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3035   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3036   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3037   for (r = 0; r < n; ++r) {
3038     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3039       a[r]   = diagA[r];
3040       idx[r] = cstart + diagIdx[r];
3041     } else {
3042       a[r]   = offdiagA[r];
3043       idx[r] = cmap[offdiagIdx[r]];
3044     }
3045   }
3046   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3047   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3048   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3049   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3050   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3051   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 #undef __FUNCT__
3056 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3057 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3058 {
3059   PetscErrorCode ierr;
3060   Mat            *dummy;
3061 
3062   PetscFunctionBegin;
3063   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3064   *newmat = *dummy;
3065   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3073 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3074 {
3075   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3076   PetscErrorCode ierr;
3077 
3078   PetscFunctionBegin;
3079   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3080   PetscFunctionReturn(0);
3081 }
3082 
3083 #undef __FUNCT__
3084 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3085 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3086 {
3087   PetscErrorCode ierr;
3088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3089 
3090   PetscFunctionBegin;
3091   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3092   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3093   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3094   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 /* -------------------------------------------------------------------*/
3099 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3100        MatGetRow_MPIAIJ,
3101        MatRestoreRow_MPIAIJ,
3102        MatMult_MPIAIJ,
3103 /* 4*/ MatMultAdd_MPIAIJ,
3104        MatMultTranspose_MPIAIJ,
3105        MatMultTransposeAdd_MPIAIJ,
3106 #if defined(PETSC_HAVE_PBGL)
3107        MatSolve_MPIAIJ,
3108 #else
3109        0,
3110 #endif
3111        0,
3112        0,
3113 /*10*/ 0,
3114        0,
3115        0,
3116        MatSOR_MPIAIJ,
3117        MatTranspose_MPIAIJ,
3118 /*15*/ MatGetInfo_MPIAIJ,
3119        MatEqual_MPIAIJ,
3120        MatGetDiagonal_MPIAIJ,
3121        MatDiagonalScale_MPIAIJ,
3122        MatNorm_MPIAIJ,
3123 /*20*/ MatAssemblyBegin_MPIAIJ,
3124        MatAssemblyEnd_MPIAIJ,
3125        MatSetOption_MPIAIJ,
3126        MatZeroEntries_MPIAIJ,
3127 /*24*/ MatZeroRows_MPIAIJ,
3128        0,
3129 #if defined(PETSC_HAVE_PBGL)
3130        0,
3131 #else
3132        0,
3133 #endif
3134        0,
3135        0,
3136 /*29*/ MatSetUp_MPIAIJ,
3137 #if defined(PETSC_HAVE_PBGL)
3138        0,
3139 #else
3140        0,
3141 #endif
3142        0,
3143        0,
3144        0,
3145 /*34*/ MatDuplicate_MPIAIJ,
3146        0,
3147        0,
3148        0,
3149        0,
3150 /*39*/ MatAXPY_MPIAIJ,
3151        MatGetSubMatrices_MPIAIJ,
3152        MatIncreaseOverlap_MPIAIJ,
3153        MatGetValues_MPIAIJ,
3154        MatCopy_MPIAIJ,
3155 /*44*/ MatGetRowMax_MPIAIJ,
3156        MatScale_MPIAIJ,
3157        0,
3158        0,
3159        MatZeroRowsColumns_MPIAIJ,
3160 /*49*/ MatSetRandom_MPIAIJ,
3161        0,
3162        0,
3163        0,
3164        0,
3165 /*54*/ MatFDColoringCreate_MPIAIJ,
3166        0,
3167        MatSetUnfactored_MPIAIJ,
3168        MatPermute_MPIAIJ,
3169        0,
3170 /*59*/ MatGetSubMatrix_MPIAIJ,
3171        MatDestroy_MPIAIJ,
3172        MatView_MPIAIJ,
3173        0,
3174        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3175 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3176        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3177        0,
3178        0,
3179        0,
3180 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3181        MatGetRowMinAbs_MPIAIJ,
3182        0,
3183        MatSetColoring_MPIAIJ,
3184        0,
3185        MatSetValuesAdifor_MPIAIJ,
3186 /*75*/ MatFDColoringApply_AIJ,
3187        0,
3188        0,
3189        0,
3190        MatFindZeroDiagonals_MPIAIJ,
3191 /*80*/ 0,
3192        0,
3193        0,
3194 /*83*/ MatLoad_MPIAIJ,
3195        0,
3196        0,
3197        0,
3198        0,
3199        0,
3200 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3201        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3202        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3203        MatPtAP_MPIAIJ_MPIAIJ,
3204        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3205 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3206        0,
3207        0,
3208        0,
3209        0,
3210 /*99*/ 0,
3211        0,
3212        0,
3213        MatConjugate_MPIAIJ,
3214        0,
3215 /*104*/MatSetValuesRow_MPIAIJ,
3216        MatRealPart_MPIAIJ,
3217        MatImaginaryPart_MPIAIJ,
3218        0,
3219        0,
3220 /*109*/0,
3221        MatGetRedundantMatrix_MPIAIJ,
3222        MatGetRowMin_MPIAIJ,
3223        0,
3224        0,
3225 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3226        0,
3227        0,
3228        0,
3229        0,
3230 /*119*/0,
3231        0,
3232        0,
3233        0,
3234        MatGetMultiProcBlock_MPIAIJ,
3235 /*124*/MatFindNonzeroRows_MPIAIJ,
3236        MatGetColumnNorms_MPIAIJ,
3237        MatInvertBlockDiagonal_MPIAIJ,
3238        0,
3239        MatGetSubMatricesParallel_MPIAIJ,
3240 /*129*/0,
3241        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3242        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3243        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3244        0,
3245 /*134*/0,
3246        0,
3247        0,
3248        0,
3249        0
3250 };
3251 
3252 /* ----------------------------------------------------------------------------------------*/
3253 
3254 EXTERN_C_BEGIN
3255 #undef __FUNCT__
3256 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3257 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3258 {
3259   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3260   PetscErrorCode ierr;
3261 
3262   PetscFunctionBegin;
3263   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3264   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3265   PetscFunctionReturn(0);
3266 }
3267 EXTERN_C_END
3268 
3269 EXTERN_C_BEGIN
3270 #undef __FUNCT__
3271 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3272 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3273 {
3274   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3275   PetscErrorCode ierr;
3276 
3277   PetscFunctionBegin;
3278   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3279   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3280   PetscFunctionReturn(0);
3281 }
3282 EXTERN_C_END
3283 
3284 EXTERN_C_BEGIN
3285 #undef __FUNCT__
3286 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3287 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3288 {
3289   Mat_MPIAIJ     *b;
3290   PetscErrorCode ierr;
3291   PetscInt       i;
3292   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3293 
3294   PetscFunctionBegin;
3295   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3296   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3297   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3298   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3299   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3300   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3301 
3302   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3303   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3304   if (d_nnz) {
3305     for (i=0; i<B->rmap->n; i++) {
3306       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3307     }
3308   }
3309   if (o_nnz) {
3310     for (i=0; i<B->rmap->n; i++) {
3311       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3312     }
3313   }
3314   b = (Mat_MPIAIJ*)B->data;
3315 
3316   if (!B->preallocated) {
3317     /* Explicitly create 2 MATSEQAIJ matrices. */
3318     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3319     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3320     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3321     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3322     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3323     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3324     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3325     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3326     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3327     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3328   }
3329 
3330   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3331   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3332   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3333   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3334   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3335   B->preallocated = PETSC_TRUE;
3336   PetscFunctionReturn(0);
3337 }
3338 EXTERN_C_END
3339 
3340 #undef __FUNCT__
3341 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3342 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3343 {
3344   Mat            mat;
3345   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3346   PetscErrorCode ierr;
3347 
3348   PetscFunctionBegin;
3349   *newmat = 0;
3350   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3351   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3352   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3353   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3354   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3355   a       = (Mat_MPIAIJ*)mat->data;
3356 
3357   mat->factortype   = matin->factortype;
3358   mat->rmap->bs     = matin->rmap->bs;
3359   mat->cmap->bs     = matin->cmap->bs;
3360   mat->assembled    = PETSC_TRUE;
3361   mat->insertmode   = NOT_SET_VALUES;
3362   mat->preallocated = PETSC_TRUE;
3363 
3364   a->size         = oldmat->size;
3365   a->rank         = oldmat->rank;
3366   a->donotstash   = oldmat->donotstash;
3367   a->roworiented  = oldmat->roworiented;
3368   a->rowindices   = 0;
3369   a->rowvalues    = 0;
3370   a->getrowactive = PETSC_FALSE;
3371 
3372   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3373   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3374 
3375   if (oldmat->colmap) {
3376 #if defined(PETSC_USE_CTABLE)
3377     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3378 #else
3379     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3380     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3381     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3382 #endif
3383   } else a->colmap = 0;
3384   if (oldmat->garray) {
3385     PetscInt len;
3386     len  = oldmat->B->cmap->n;
3387     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3388     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3389     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3390   } else a->garray = 0;
3391 
3392   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3393   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3394   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3395   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3396   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3397   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3398   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3399   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3400   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3401   *newmat = mat;
3402   PetscFunctionReturn(0);
3403 }
3404 
3405 
3406 
3407 #undef __FUNCT__
3408 #define __FUNCT__ "MatLoad_MPIAIJ"
3409 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3410 {
3411   PetscScalar    *vals,*svals;
3412   MPI_Comm       comm;
3413   PetscErrorCode ierr;
3414   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3415   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3416   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3417   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3418   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3419   int            fd;
3420   PetscInt       bs = 1;
3421 
3422   PetscFunctionBegin;
3423   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3424   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3425   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3426   if (!rank) {
3427     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3428     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3429     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3430   }
3431 
3432   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3433   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3434   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3435 
3436   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3437 
3438   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3439   M    = header[1]; N = header[2];
3440   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3441   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3442   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3443 
3444   /* If global sizes are set, check if they are consistent with that given in the file */
3445   if (sizesset) {
3446     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3447   }
3448   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3449   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3450 
3451   /* determine ownership of all (block) rows */
3452   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3453   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3454   else m = newMat->rmap->n; /* Set by user */
3455 
3456   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3457   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3458 
3459   /* First process needs enough room for process with most rows */
3460   if (!rank) {
3461     mmax = rowners[1];
3462     for (i=2; i<=size; i++) {
3463       mmax = PetscMax(mmax, rowners[i]);
3464     }
3465   } else mmax = m;
3466 
3467   rowners[0] = 0;
3468   for (i=2; i<=size; i++) {
3469     rowners[i] += rowners[i-1];
3470   }
3471   rstart = rowners[rank];
3472   rend   = rowners[rank+1];
3473 
3474   /* distribute row lengths to all processors */
3475   ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3476   if (!rank) {
3477     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3478     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3479     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3480     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3481     for (j=0; j<m; j++) {
3482       procsnz[0] += ourlens[j];
3483     }
3484     for (i=1; i<size; i++) {
3485       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3486       /* calculate the number of nonzeros on each processor */
3487       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3488         procsnz[i] += rowlengths[j];
3489       }
3490       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3491     }
3492     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3493   } else {
3494     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3495   }
3496 
3497   if (!rank) {
3498     /* determine max buffer needed and allocate it */
3499     maxnz = 0;
3500     for (i=0; i<size; i++) {
3501       maxnz = PetscMax(maxnz,procsnz[i]);
3502     }
3503     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3504 
3505     /* read in my part of the matrix column indices  */
3506     nz   = procsnz[0];
3507     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3508     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3509 
3510     /* read in every one elses and ship off */
3511     for (i=1; i<size; i++) {
3512       nz   = procsnz[i];
3513       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3514       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3515     }
3516     ierr = PetscFree(cols);CHKERRQ(ierr);
3517   } else {
3518     /* determine buffer space needed for message */
3519     nz = 0;
3520     for (i=0; i<m; i++) {
3521       nz += ourlens[i];
3522     }
3523     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3524 
3525     /* receive message of column indices*/
3526     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3527   }
3528 
3529   /* determine column ownership if matrix is not square */
3530   if (N != M) {
3531     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3532     else n = newMat->cmap->n;
3533     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3534     cstart = cend - n;
3535   } else {
3536     cstart = rstart;
3537     cend   = rend;
3538     n      = cend - cstart;
3539   }
3540 
3541   /* loop over local rows, determining number of off diagonal entries */
3542   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3543   jj   = 0;
3544   for (i=0; i<m; i++) {
3545     for (j=0; j<ourlens[i]; j++) {
3546       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3547       jj++;
3548     }
3549   }
3550 
3551   for (i=0; i<m; i++) {
3552     ourlens[i] -= offlens[i];
3553   }
3554   if (!sizesset) {
3555     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3556   }
3557 
3558   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3559 
3560   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3561 
3562   for (i=0; i<m; i++) {
3563     ourlens[i] += offlens[i];
3564   }
3565 
3566   if (!rank) {
3567     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3568 
3569     /* read in my part of the matrix numerical values  */
3570     nz   = procsnz[0];
3571     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3572 
3573     /* insert into matrix */
3574     jj      = rstart;
3575     smycols = mycols;
3576     svals   = vals;
3577     for (i=0; i<m; i++) {
3578       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3579       smycols += ourlens[i];
3580       svals   += ourlens[i];
3581       jj++;
3582     }
3583 
3584     /* read in other processors and ship out */
3585     for (i=1; i<size; i++) {
3586       nz   = procsnz[i];
3587       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3588       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3589     }
3590     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3591   } else {
3592     /* receive numeric values */
3593     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3594 
3595     /* receive message of values*/
3596     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3597 
3598     /* insert into matrix */
3599     jj      = rstart;
3600     smycols = mycols;
3601     svals   = vals;
3602     for (i=0; i<m; i++) {
3603       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3604       smycols += ourlens[i];
3605       svals   += ourlens[i];
3606       jj++;
3607     }
3608   }
3609   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3610   ierr = PetscFree(vals);CHKERRQ(ierr);
3611   ierr = PetscFree(mycols);CHKERRQ(ierr);
3612   ierr = PetscFree(rowners);CHKERRQ(ierr);
3613   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3614   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3615   PetscFunctionReturn(0);
3616 }
3617 
3618 #undef __FUNCT__
3619 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3620 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3621 {
3622   PetscErrorCode ierr;
3623   IS             iscol_local;
3624   PetscInt       csize;
3625 
3626   PetscFunctionBegin;
3627   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3628   if (call == MAT_REUSE_MATRIX) {
3629     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3630     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3631   } else {
3632     PetscInt cbs;
3633     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3634     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3635     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3636   }
3637   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3638   if (call == MAT_INITIAL_MATRIX) {
3639     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3640     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3641   }
3642   PetscFunctionReturn(0);
3643 }
3644 
3645 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3646 #undef __FUNCT__
3647 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3648 /*
3649     Not great since it makes two copies of the submatrix, first an SeqAIJ
3650   in local and then by concatenating the local matrices the end result.
3651   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3652 
3653   Note: This requires a sequential iscol with all indices.
3654 */
3655 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3656 {
3657   PetscErrorCode ierr;
3658   PetscMPIInt    rank,size;
3659   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3660   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3661   PetscBool      allcolumns, colflag;
3662   Mat            M,Mreuse;
3663   MatScalar      *vwork,*aa;
3664   MPI_Comm       comm;
3665   Mat_SeqAIJ     *aij;
3666 
3667   PetscFunctionBegin;
3668   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3669   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3670   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3671 
3672   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3673   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3674   if (colflag && ncol == mat->cmap->N) {
3675     allcolumns = PETSC_TRUE;
3676   } else {
3677     allcolumns = PETSC_FALSE;
3678   }
3679   if (call ==  MAT_REUSE_MATRIX) {
3680     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3681     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3682     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3683   } else {
3684     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3685   }
3686 
3687   /*
3688       m - number of local rows
3689       n - number of columns (same on all processors)
3690       rstart - first row in new global matrix generated
3691   */
3692   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3693   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3694   if (call == MAT_INITIAL_MATRIX) {
3695     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3696     ii  = aij->i;
3697     jj  = aij->j;
3698 
3699     /*
3700         Determine the number of non-zeros in the diagonal and off-diagonal
3701         portions of the matrix in order to do correct preallocation
3702     */
3703 
3704     /* first get start and end of "diagonal" columns */
3705     if (csize == PETSC_DECIDE) {
3706       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3707       if (mglobal == n) { /* square matrix */
3708         nlocal = m;
3709       } else {
3710         nlocal = n/size + ((n % size) > rank);
3711       }
3712     } else {
3713       nlocal = csize;
3714     }
3715     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3716     rstart = rend - nlocal;
3717     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3718 
3719     /* next, compute all the lengths */
3720     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3721     olens = dlens + m;
3722     for (i=0; i<m; i++) {
3723       jend = ii[i+1] - ii[i];
3724       olen = 0;
3725       dlen = 0;
3726       for (j=0; j<jend; j++) {
3727         if (*jj < rstart || *jj >= rend) olen++;
3728         else dlen++;
3729         jj++;
3730       }
3731       olens[i] = olen;
3732       dlens[i] = dlen;
3733     }
3734     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3735     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3736     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3737     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3738     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3739     ierr = PetscFree(dlens);CHKERRQ(ierr);
3740   } else {
3741     PetscInt ml,nl;
3742 
3743     M    = *newmat;
3744     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3745     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3746     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3747     /*
3748          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3749        rather than the slower MatSetValues().
3750     */
3751     M->was_assembled = PETSC_TRUE;
3752     M->assembled     = PETSC_FALSE;
3753   }
3754   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3755   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3756   ii   = aij->i;
3757   jj   = aij->j;
3758   aa   = aij->a;
3759   for (i=0; i<m; i++) {
3760     row   = rstart + i;
3761     nz    = ii[i+1] - ii[i];
3762     cwork = jj;     jj += nz;
3763     vwork = aa;     aa += nz;
3764     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3765   }
3766 
3767   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3768   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3769   *newmat = M;
3770 
3771   /* save submatrix used in processor for next request */
3772   if (call ==  MAT_INITIAL_MATRIX) {
3773     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3774     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3775   }
3776   PetscFunctionReturn(0);
3777 }
3778 
3779 EXTERN_C_BEGIN
3780 #undef __FUNCT__
3781 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3782 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3783 {
3784   PetscInt       m,cstart, cend,j,nnz,i,d;
3785   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3786   const PetscInt *JJ;
3787   PetscScalar    *values;
3788   PetscErrorCode ierr;
3789 
3790   PetscFunctionBegin;
3791   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3792 
3793   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3794   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3795   m      = B->rmap->n;
3796   cstart = B->cmap->rstart;
3797   cend   = B->cmap->rend;
3798   rstart = B->rmap->rstart;
3799 
3800   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3801 
3802 #if defined(PETSC_USE_DEBUGGING)
3803   for (i=0; i<m; i++) {
3804     nnz = Ii[i+1]- Ii[i];
3805     JJ  = J + Ii[i];
3806     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3807     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3808     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3809   }
3810 #endif
3811 
3812   for (i=0; i<m; i++) {
3813     nnz     = Ii[i+1]- Ii[i];
3814     JJ      = J + Ii[i];
3815     nnz_max = PetscMax(nnz_max,nnz);
3816     d       = 0;
3817     for (j=0; j<nnz; j++) {
3818       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3819     }
3820     d_nnz[i] = d;
3821     o_nnz[i] = nnz - d;
3822   }
3823   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3824   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3825 
3826   if (v) values = (PetscScalar*)v;
3827   else {
3828     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3829     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3830   }
3831 
3832   for (i=0; i<m; i++) {
3833     ii   = i + rstart;
3834     nnz  = Ii[i+1]- Ii[i];
3835     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3836   }
3837   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3838   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3839 
3840   if (!v) {
3841     ierr = PetscFree(values);CHKERRQ(ierr);
3842   }
3843   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3844   PetscFunctionReturn(0);
3845 }
3846 EXTERN_C_END
3847 
3848 #undef __FUNCT__
3849 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3850 /*@
3851    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3852    (the default parallel PETSc format).
3853 
3854    Collective on MPI_Comm
3855 
3856    Input Parameters:
3857 +  B - the matrix
3858 .  i - the indices into j for the start of each local row (starts with zero)
3859 .  j - the column indices for each local row (starts with zero)
3860 -  v - optional values in the matrix
3861 
3862    Level: developer
3863 
3864    Notes:
3865        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3866      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3867      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3868 
3869        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3870 
3871        The format which is used for the sparse matrix input, is equivalent to a
3872     row-major ordering.. i.e for the following matrix, the input data expected is
3873     as shown:
3874 
3875         1 0 0
3876         2 0 3     P0
3877        -------
3878         4 5 6     P1
3879 
3880      Process0 [P0]: rows_owned=[0,1]
3881         i =  {0,1,3}  [size = nrow+1  = 2+1]
3882         j =  {0,0,2}  [size = nz = 6]
3883         v =  {1,2,3}  [size = nz = 6]
3884 
3885      Process1 [P1]: rows_owned=[2]
3886         i =  {0,3}    [size = nrow+1  = 1+1]
3887         j =  {0,1,2}  [size = nz = 6]
3888         v =  {4,5,6}  [size = nz = 6]
3889 
3890 .keywords: matrix, aij, compressed row, sparse, parallel
3891 
3892 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3893           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3894 @*/
3895 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3896 {
3897   PetscErrorCode ierr;
3898 
3899   PetscFunctionBegin;
3900   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3901   PetscFunctionReturn(0);
3902 }
3903 
3904 #undef __FUNCT__
3905 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3906 /*@C
3907    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3908    (the default parallel PETSc format).  For good matrix assembly performance
3909    the user should preallocate the matrix storage by setting the parameters
3910    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3911    performance can be increased by more than a factor of 50.
3912 
3913    Collective on MPI_Comm
3914 
3915    Input Parameters:
3916 +  A - the matrix
3917 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3918            (same value is used for all local rows)
3919 .  d_nnz - array containing the number of nonzeros in the various rows of the
3920            DIAGONAL portion of the local submatrix (possibly different for each row)
3921            or NULL, if d_nz is used to specify the nonzero structure.
3922            The size of this array is equal to the number of local rows, i.e 'm'.
3923            For matrices that will be factored, you must leave room for (and set)
3924            the diagonal entry even if it is zero.
3925 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3926            submatrix (same value is used for all local rows).
3927 -  o_nnz - array containing the number of nonzeros in the various rows of the
3928            OFF-DIAGONAL portion of the local submatrix (possibly different for
3929            each row) or NULL, if o_nz is used to specify the nonzero
3930            structure. The size of this array is equal to the number
3931            of local rows, i.e 'm'.
3932 
3933    If the *_nnz parameter is given then the *_nz parameter is ignored
3934 
3935    The AIJ format (also called the Yale sparse matrix format or
3936    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3937    storage.  The stored row and column indices begin with zero.
3938    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3939 
3940    The parallel matrix is partitioned such that the first m0 rows belong to
3941    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3942    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3943 
3944    The DIAGONAL portion of the local submatrix of a processor can be defined
3945    as the submatrix which is obtained by extraction the part corresponding to
3946    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3947    first row that belongs to the processor, r2 is the last row belonging to
3948    the this processor, and c1-c2 is range of indices of the local part of a
3949    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3950    common case of a square matrix, the row and column ranges are the same and
3951    the DIAGONAL part is also square. The remaining portion of the local
3952    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3953 
3954    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3955 
3956    You can call MatGetInfo() to get information on how effective the preallocation was;
3957    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3958    You can also run with the option -info and look for messages with the string
3959    malloc in them to see if additional memory allocation was needed.
3960 
3961    Example usage:
3962 
3963    Consider the following 8x8 matrix with 34 non-zero values, that is
3964    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3965    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3966    as follows:
3967 
3968 .vb
3969             1  2  0  |  0  3  0  |  0  4
3970     Proc0   0  5  6  |  7  0  0  |  8  0
3971             9  0 10  | 11  0  0  | 12  0
3972     -------------------------------------
3973            13  0 14  | 15 16 17  |  0  0
3974     Proc1   0 18  0  | 19 20 21  |  0  0
3975             0  0  0  | 22 23  0  | 24  0
3976     -------------------------------------
3977     Proc2  25 26 27  |  0  0 28  | 29  0
3978            30  0  0  | 31 32 33  |  0 34
3979 .ve
3980 
3981    This can be represented as a collection of submatrices as:
3982 
3983 .vb
3984       A B C
3985       D E F
3986       G H I
3987 .ve
3988 
3989    Where the submatrices A,B,C are owned by proc0, D,E,F are
3990    owned by proc1, G,H,I are owned by proc2.
3991 
3992    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3993    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3994    The 'M','N' parameters are 8,8, and have the same values on all procs.
3995 
3996    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3997    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3998    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3999    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4000    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4001    matrix, ans [DF] as another SeqAIJ matrix.
4002 
4003    When d_nz, o_nz parameters are specified, d_nz storage elements are
4004    allocated for every row of the local diagonal submatrix, and o_nz
4005    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4006    One way to choose d_nz and o_nz is to use the max nonzerors per local
4007    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4008    In this case, the values of d_nz,o_nz are:
4009 .vb
4010      proc0 : dnz = 2, o_nz = 2
4011      proc1 : dnz = 3, o_nz = 2
4012      proc2 : dnz = 1, o_nz = 4
4013 .ve
4014    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4015    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4016    for proc3. i.e we are using 12+15+10=37 storage locations to store
4017    34 values.
4018 
4019    When d_nnz, o_nnz parameters are specified, the storage is specified
4020    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4021    In the above case the values for d_nnz,o_nnz are:
4022 .vb
4023      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4024      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4025      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4026 .ve
4027    Here the space allocated is sum of all the above values i.e 34, and
4028    hence pre-allocation is perfect.
4029 
4030    Level: intermediate
4031 
4032 .keywords: matrix, aij, compressed row, sparse, parallel
4033 
4034 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4035           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4036 @*/
4037 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4038 {
4039   PetscErrorCode ierr;
4040 
4041   PetscFunctionBegin;
4042   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4043   PetscValidType(B,1);
4044   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4045   PetscFunctionReturn(0);
4046 }
4047 
4048 #undef __FUNCT__
4049 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4050 /*@
4051      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4052          CSR format the local rows.
4053 
4054    Collective on MPI_Comm
4055 
4056    Input Parameters:
4057 +  comm - MPI communicator
4058 .  m - number of local rows (Cannot be PETSC_DECIDE)
4059 .  n - This value should be the same as the local size used in creating the
4060        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4061        calculated if N is given) For square matrices n is almost always m.
4062 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4063 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4064 .   i - row indices
4065 .   j - column indices
4066 -   a - matrix values
4067 
4068    Output Parameter:
4069 .   mat - the matrix
4070 
4071    Level: intermediate
4072 
4073    Notes:
4074        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4075      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4076      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4077 
4078        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4079 
4080        The format which is used for the sparse matrix input, is equivalent to a
4081     row-major ordering.. i.e for the following matrix, the input data expected is
4082     as shown:
4083 
4084         1 0 0
4085         2 0 3     P0
4086        -------
4087         4 5 6     P1
4088 
4089      Process0 [P0]: rows_owned=[0,1]
4090         i =  {0,1,3}  [size = nrow+1  = 2+1]
4091         j =  {0,0,2}  [size = nz = 6]
4092         v =  {1,2,3}  [size = nz = 6]
4093 
4094      Process1 [P1]: rows_owned=[2]
4095         i =  {0,3}    [size = nrow+1  = 1+1]
4096         j =  {0,1,2}  [size = nz = 6]
4097         v =  {4,5,6}  [size = nz = 6]
4098 
4099 .keywords: matrix, aij, compressed row, sparse, parallel
4100 
4101 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4102           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4103 @*/
4104 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4105 {
4106   PetscErrorCode ierr;
4107 
4108   PetscFunctionBegin;
4109   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4110   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4111   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4112   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4113   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4114   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4115   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4116   PetscFunctionReturn(0);
4117 }
4118 
4119 #undef __FUNCT__
4120 #define __FUNCT__ "MatCreateAIJ"
4121 /*@C
4122    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4123    (the default parallel PETSc format).  For good matrix assembly performance
4124    the user should preallocate the matrix storage by setting the parameters
4125    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4126    performance can be increased by more than a factor of 50.
4127 
4128    Collective on MPI_Comm
4129 
4130    Input Parameters:
4131 +  comm - MPI communicator
4132 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4133            This value should be the same as the local size used in creating the
4134            y vector for the matrix-vector product y = Ax.
4135 .  n - This value should be the same as the local size used in creating the
4136        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4137        calculated if N is given) For square matrices n is almost always m.
4138 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4139 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4140 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4141            (same value is used for all local rows)
4142 .  d_nnz - array containing the number of nonzeros in the various rows of the
4143            DIAGONAL portion of the local submatrix (possibly different for each row)
4144            or NULL, if d_nz is used to specify the nonzero structure.
4145            The size of this array is equal to the number of local rows, i.e 'm'.
4146 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4147            submatrix (same value is used for all local rows).
4148 -  o_nnz - array containing the number of nonzeros in the various rows of the
4149            OFF-DIAGONAL portion of the local submatrix (possibly different for
4150            each row) or NULL, if o_nz is used to specify the nonzero
4151            structure. The size of this array is equal to the number
4152            of local rows, i.e 'm'.
4153 
4154    Output Parameter:
4155 .  A - the matrix
4156 
4157    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4158    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4159    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4160 
4161    Notes:
4162    If the *_nnz parameter is given then the *_nz parameter is ignored
4163 
4164    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4165    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4166    storage requirements for this matrix.
4167 
4168    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4169    processor than it must be used on all processors that share the object for
4170    that argument.
4171 
4172    The user MUST specify either the local or global matrix dimensions
4173    (possibly both).
4174 
4175    The parallel matrix is partitioned across processors such that the
4176    first m0 rows belong to process 0, the next m1 rows belong to
4177    process 1, the next m2 rows belong to process 2 etc.. where
4178    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4179    values corresponding to [m x N] submatrix.
4180 
4181    The columns are logically partitioned with the n0 columns belonging
4182    to 0th partition, the next n1 columns belonging to the next
4183    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4184 
4185    The DIAGONAL portion of the local submatrix on any given processor
4186    is the submatrix corresponding to the rows and columns m,n
4187    corresponding to the given processor. i.e diagonal matrix on
4188    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4189    etc. The remaining portion of the local submatrix [m x (N-n)]
4190    constitute the OFF-DIAGONAL portion. The example below better
4191    illustrates this concept.
4192 
4193    For a square global matrix we define each processor's diagonal portion
4194    to be its local rows and the corresponding columns (a square submatrix);
4195    each processor's off-diagonal portion encompasses the remainder of the
4196    local matrix (a rectangular submatrix).
4197 
4198    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4199 
4200    When calling this routine with a single process communicator, a matrix of
4201    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4202    type of communicator, use the construction mechanism:
4203      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4204 
4205    By default, this format uses inodes (identical nodes) when possible.
4206    We search for consecutive rows with the same nonzero structure, thereby
4207    reusing matrix information to achieve increased efficiency.
4208 
4209    Options Database Keys:
4210 +  -mat_no_inode  - Do not use inodes
4211 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4212 -  -mat_aij_oneindex - Internally use indexing starting at 1
4213         rather than 0.  Note that when calling MatSetValues(),
4214         the user still MUST index entries starting at 0!
4215 
4216 
4217    Example usage:
4218 
4219    Consider the following 8x8 matrix with 34 non-zero values, that is
4220    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4221    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4222    as follows:
4223 
4224 .vb
4225             1  2  0  |  0  3  0  |  0  4
4226     Proc0   0  5  6  |  7  0  0  |  8  0
4227             9  0 10  | 11  0  0  | 12  0
4228     -------------------------------------
4229            13  0 14  | 15 16 17  |  0  0
4230     Proc1   0 18  0  | 19 20 21  |  0  0
4231             0  0  0  | 22 23  0  | 24  0
4232     -------------------------------------
4233     Proc2  25 26 27  |  0  0 28  | 29  0
4234            30  0  0  | 31 32 33  |  0 34
4235 .ve
4236 
4237    This can be represented as a collection of submatrices as:
4238 
4239 .vb
4240       A B C
4241       D E F
4242       G H I
4243 .ve
4244 
4245    Where the submatrices A,B,C are owned by proc0, D,E,F are
4246    owned by proc1, G,H,I are owned by proc2.
4247 
4248    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4249    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4250    The 'M','N' parameters are 8,8, and have the same values on all procs.
4251 
4252    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4253    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4254    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4255    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4256    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4257    matrix, ans [DF] as another SeqAIJ matrix.
4258 
4259    When d_nz, o_nz parameters are specified, d_nz storage elements are
4260    allocated for every row of the local diagonal submatrix, and o_nz
4261    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4262    One way to choose d_nz and o_nz is to use the max nonzerors per local
4263    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4264    In this case, the values of d_nz,o_nz are:
4265 .vb
4266      proc0 : dnz = 2, o_nz = 2
4267      proc1 : dnz = 3, o_nz = 2
4268      proc2 : dnz = 1, o_nz = 4
4269 .ve
4270    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4271    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4272    for proc3. i.e we are using 12+15+10=37 storage locations to store
4273    34 values.
4274 
4275    When d_nnz, o_nnz parameters are specified, the storage is specified
4276    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4277    In the above case the values for d_nnz,o_nnz are:
4278 .vb
4279      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4280      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4281      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4282 .ve
4283    Here the space allocated is sum of all the above values i.e 34, and
4284    hence pre-allocation is perfect.
4285 
4286    Level: intermediate
4287 
4288 .keywords: matrix, aij, compressed row, sparse, parallel
4289 
4290 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4291           MPIAIJ, MatCreateMPIAIJWithArrays()
4292 @*/
4293 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4294 {
4295   PetscErrorCode ierr;
4296   PetscMPIInt    size;
4297 
4298   PetscFunctionBegin;
4299   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4300   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4301   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4302   if (size > 1) {
4303     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4304     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4305   } else {
4306     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4307     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4308   }
4309   PetscFunctionReturn(0);
4310 }
4311 
4312 #undef __FUNCT__
4313 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4314 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4315 {
4316   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4317 
4318   PetscFunctionBegin;
4319   *Ad     = a->A;
4320   *Ao     = a->B;
4321   *colmap = a->garray;
4322   PetscFunctionReturn(0);
4323 }
4324 
4325 #undef __FUNCT__
4326 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4327 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4328 {
4329   PetscErrorCode ierr;
4330   PetscInt       i;
4331   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4332 
4333   PetscFunctionBegin;
4334   if (coloring->ctype == IS_COLORING_GLOBAL) {
4335     ISColoringValue *allcolors,*colors;
4336     ISColoring      ocoloring;
4337 
4338     /* set coloring for diagonal portion */
4339     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4340 
4341     /* set coloring for off-diagonal portion */
4342     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4343     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4344     for (i=0; i<a->B->cmap->n; i++) {
4345       colors[i] = allcolors[a->garray[i]];
4346     }
4347     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4348     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4349     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4350     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4351   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4352     ISColoringValue *colors;
4353     PetscInt        *larray;
4354     ISColoring      ocoloring;
4355 
4356     /* set coloring for diagonal portion */
4357     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4358     for (i=0; i<a->A->cmap->n; i++) {
4359       larray[i] = i + A->cmap->rstart;
4360     }
4361     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4362     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4363     for (i=0; i<a->A->cmap->n; i++) {
4364       colors[i] = coloring->colors[larray[i]];
4365     }
4366     ierr = PetscFree(larray);CHKERRQ(ierr);
4367     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4368     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4369     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4370 
4371     /* set coloring for off-diagonal portion */
4372     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4373     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4374     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4375     for (i=0; i<a->B->cmap->n; i++) {
4376       colors[i] = coloring->colors[larray[i]];
4377     }
4378     ierr = PetscFree(larray);CHKERRQ(ierr);
4379     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4380     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4381     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4382   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4383   PetscFunctionReturn(0);
4384 }
4385 
4386 #undef __FUNCT__
4387 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4388 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4389 {
4390   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4391   PetscErrorCode ierr;
4392 
4393   PetscFunctionBegin;
4394   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4395   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4396   PetscFunctionReturn(0);
4397 }
4398 
4399 #undef __FUNCT__
4400 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4401 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4402 {
4403   PetscErrorCode ierr;
4404   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4405   PetscInt       *indx;
4406 
4407   PetscFunctionBegin;
4408   /* This routine will ONLY return MPIAIJ type matrix */
4409   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4410   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4411   if (n == PETSC_DECIDE) {
4412     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4413   }
4414   /* Check sum(n) = N */
4415   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4416   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4417 
4418   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4419   rstart -= m;
4420 
4421   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4422   for (i=0; i<m; i++) {
4423     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4424     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4425     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4426   }
4427 
4428   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4429   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4430   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4431   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4432   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4433   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4434   PetscFunctionReturn(0);
4435 }
4436 
4437 #undef __FUNCT__
4438 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4439 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4440 {
4441   PetscErrorCode ierr;
4442   PetscInt       m,N,i,rstart,nnz,Ii;
4443   PetscInt       *indx;
4444   PetscScalar    *values;
4445 
4446   PetscFunctionBegin;
4447   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4448   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4449   for (i=0; i<m; i++) {
4450     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4451     Ii   = i + rstart;
4452     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4453     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4454   }
4455   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4456   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4457   PetscFunctionReturn(0);
4458 }
4459 
4460 #undef __FUNCT__
4461 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4462 /*@
4463       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4464                  matrices from each processor
4465 
4466     Collective on MPI_Comm
4467 
4468    Input Parameters:
4469 +    comm - the communicators the parallel matrix will live on
4470 .    inmat - the input sequential matrices
4471 .    n - number of local columns (or PETSC_DECIDE)
4472 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4473 
4474    Output Parameter:
4475 .    outmat - the parallel matrix generated
4476 
4477     Level: advanced
4478 
4479    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4480 
4481 @*/
4482 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4483 {
4484   PetscErrorCode ierr;
4485 
4486   PetscFunctionBegin;
4487   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4488   if (scall == MAT_INITIAL_MATRIX) {
4489     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4490   }
4491   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4492   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4493   PetscFunctionReturn(0);
4494 }
4495 
4496 #undef __FUNCT__
4497 #define __FUNCT__ "MatFileSplit"
4498 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4499 {
4500   PetscErrorCode    ierr;
4501   PetscMPIInt       rank;
4502   PetscInt          m,N,i,rstart,nnz;
4503   size_t            len;
4504   const PetscInt    *indx;
4505   PetscViewer       out;
4506   char              *name;
4507   Mat               B;
4508   const PetscScalar *values;
4509 
4510   PetscFunctionBegin;
4511   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4512   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4513   /* Should this be the type of the diagonal block of A? */
4514   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4515   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4516   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4517   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4518   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4519   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4520   for (i=0; i<m; i++) {
4521     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4522     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4523     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4524   }
4525   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4526   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4527 
4528   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4529   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4530   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4531   sprintf(name,"%s.%d",outfile,rank);
4532   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4533   ierr = PetscFree(name);CHKERRQ(ierr);
4534   ierr = MatView(B,out);CHKERRQ(ierr);
4535   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4536   ierr = MatDestroy(&B);CHKERRQ(ierr);
4537   PetscFunctionReturn(0);
4538 }
4539 
4540 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4541 #undef __FUNCT__
4542 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4543 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4544 {
4545   PetscErrorCode      ierr;
4546   Mat_Merge_SeqsToMPI *merge;
4547   PetscContainer      container;
4548 
4549   PetscFunctionBegin;
4550   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4551   if (container) {
4552     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4555     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4556     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4557     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4558     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4559     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4560     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4561     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4562     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4563     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4564     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4565     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4566     ierr = PetscFree(merge);CHKERRQ(ierr);
4567     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4568   }
4569   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4570   PetscFunctionReturn(0);
4571 }
4572 
4573 #include <../src/mat/utils/freespace.h>
4574 #include <petscbt.h>
4575 
4576 #undef __FUNCT__
4577 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4578 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4579 {
4580   PetscErrorCode      ierr;
4581   MPI_Comm            comm;
4582   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4583   PetscMPIInt         size,rank,taga,*len_s;
4584   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4585   PetscInt            proc,m;
4586   PetscInt            **buf_ri,**buf_rj;
4587   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4588   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4589   MPI_Request         *s_waits,*r_waits;
4590   MPI_Status          *status;
4591   MatScalar           *aa=a->a;
4592   MatScalar           **abuf_r,*ba_i;
4593   Mat_Merge_SeqsToMPI *merge;
4594   PetscContainer      container;
4595 
4596   PetscFunctionBegin;
4597   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4598   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4599 
4600   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4601   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4602 
4603   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4604   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4605 
4606   bi     = merge->bi;
4607   bj     = merge->bj;
4608   buf_ri = merge->buf_ri;
4609   buf_rj = merge->buf_rj;
4610 
4611   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4612   owners = merge->rowmap->range;
4613   len_s  = merge->len_s;
4614 
4615   /* send and recv matrix values */
4616   /*-----------------------------*/
4617   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4618   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4619 
4620   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4621   for (proc=0,k=0; proc<size; proc++) {
4622     if (!len_s[proc]) continue;
4623     i    = owners[proc];
4624     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4625     k++;
4626   }
4627 
4628   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4629   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4630   ierr = PetscFree(status);CHKERRQ(ierr);
4631 
4632   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4633   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4634 
4635   /* insert mat values of mpimat */
4636   /*----------------------------*/
4637   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4638   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4639 
4640   for (k=0; k<merge->nrecv; k++) {
4641     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4642     nrows       = *(buf_ri_k[k]);
4643     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4644     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4645   }
4646 
4647   /* set values of ba */
4648   m = merge->rowmap->n;
4649   for (i=0; i<m; i++) {
4650     arow = owners[rank] + i;
4651     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4652     bnzi = bi[i+1] - bi[i];
4653     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4654 
4655     /* add local non-zero vals of this proc's seqmat into ba */
4656     anzi   = ai[arow+1] - ai[arow];
4657     aj     = a->j + ai[arow];
4658     aa     = a->a + ai[arow];
4659     nextaj = 0;
4660     for (j=0; nextaj<anzi; j++) {
4661       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4662         ba_i[j] += aa[nextaj++];
4663       }
4664     }
4665 
4666     /* add received vals into ba */
4667     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4668       /* i-th row */
4669       if (i == *nextrow[k]) {
4670         anzi   = *(nextai[k]+1) - *nextai[k];
4671         aj     = buf_rj[k] + *(nextai[k]);
4672         aa     = abuf_r[k] + *(nextai[k]);
4673         nextaj = 0;
4674         for (j=0; nextaj<anzi; j++) {
4675           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4676             ba_i[j] += aa[nextaj++];
4677           }
4678         }
4679         nextrow[k]++; nextai[k]++;
4680       }
4681     }
4682     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4683   }
4684   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4685   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4686 
4687   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4688   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4689   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4690   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4691   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4692   PetscFunctionReturn(0);
4693 }
4694 
4695 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4696 
4697 #undef __FUNCT__
4698 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4699 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4700 {
4701   PetscErrorCode      ierr;
4702   Mat                 B_mpi;
4703   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4704   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4705   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4706   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4707   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4708   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4709   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4710   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4711   MPI_Status          *status;
4712   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4713   PetscBT             lnkbt;
4714   Mat_Merge_SeqsToMPI *merge;
4715   PetscContainer      container;
4716 
4717   PetscFunctionBegin;
4718   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4719 
4720   /* make sure it is a PETSc comm */
4721   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4722   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4723   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4724 
4725   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4726   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4727 
4728   /* determine row ownership */
4729   /*---------------------------------------------------------*/
4730   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4731   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4732   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4733   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4734   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4735   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4736   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4737 
4738   m      = merge->rowmap->n;
4739   owners = merge->rowmap->range;
4740 
4741   /* determine the number of messages to send, their lengths */
4742   /*---------------------------------------------------------*/
4743   len_s = merge->len_s;
4744 
4745   len          = 0; /* length of buf_si[] */
4746   merge->nsend = 0;
4747   for (proc=0; proc<size; proc++) {
4748     len_si[proc] = 0;
4749     if (proc == rank) {
4750       len_s[proc] = 0;
4751     } else {
4752       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4753       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4754     }
4755     if (len_s[proc]) {
4756       merge->nsend++;
4757       nrows = 0;
4758       for (i=owners[proc]; i<owners[proc+1]; i++) {
4759         if (ai[i+1] > ai[i]) nrows++;
4760       }
4761       len_si[proc] = 2*(nrows+1);
4762       len         += len_si[proc];
4763     }
4764   }
4765 
4766   /* determine the number and length of messages to receive for ij-structure */
4767   /*-------------------------------------------------------------------------*/
4768   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4769   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4770 
4771   /* post the Irecv of j-structure */
4772   /*-------------------------------*/
4773   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4774   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4775 
4776   /* post the Isend of j-structure */
4777   /*--------------------------------*/
4778   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4779 
4780   for (proc=0, k=0; proc<size; proc++) {
4781     if (!len_s[proc]) continue;
4782     i    = owners[proc];
4783     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4784     k++;
4785   }
4786 
4787   /* receives and sends of j-structure are complete */
4788   /*------------------------------------------------*/
4789   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4790   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4791 
4792   /* send and recv i-structure */
4793   /*---------------------------*/
4794   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4795   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4796 
4797   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4798   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4799   for (proc=0,k=0; proc<size; proc++) {
4800     if (!len_s[proc]) continue;
4801     /* form outgoing message for i-structure:
4802          buf_si[0]:                 nrows to be sent
4803                [1:nrows]:           row index (global)
4804                [nrows+1:2*nrows+1]: i-structure index
4805     */
4806     /*-------------------------------------------*/
4807     nrows       = len_si[proc]/2 - 1;
4808     buf_si_i    = buf_si + nrows+1;
4809     buf_si[0]   = nrows;
4810     buf_si_i[0] = 0;
4811     nrows       = 0;
4812     for (i=owners[proc]; i<owners[proc+1]; i++) {
4813       anzi = ai[i+1] - ai[i];
4814       if (anzi) {
4815         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4816         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4817         nrows++;
4818       }
4819     }
4820     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4821     k++;
4822     buf_si += len_si[proc];
4823   }
4824 
4825   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4826   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4827 
4828   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4829   for (i=0; i<merge->nrecv; i++) {
4830     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4831   }
4832 
4833   ierr = PetscFree(len_si);CHKERRQ(ierr);
4834   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4835   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4836   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4837   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4838   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4839   ierr = PetscFree(status);CHKERRQ(ierr);
4840 
4841   /* compute a local seq matrix in each processor */
4842   /*----------------------------------------------*/
4843   /* allocate bi array and free space for accumulating nonzero column info */
4844   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4845   bi[0] = 0;
4846 
4847   /* create and initialize a linked list */
4848   nlnk = N+1;
4849   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4850 
4851   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4852   len  = ai[owners[rank+1]] - ai[owners[rank]];
4853   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4854 
4855   current_space = free_space;
4856 
4857   /* determine symbolic info for each local row */
4858   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4859 
4860   for (k=0; k<merge->nrecv; k++) {
4861     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4862     nrows       = *buf_ri_k[k];
4863     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4864     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4865   }
4866 
4867   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4868   len  = 0;
4869   for (i=0; i<m; i++) {
4870     bnzi = 0;
4871     /* add local non-zero cols of this proc's seqmat into lnk */
4872     arow  = owners[rank] + i;
4873     anzi  = ai[arow+1] - ai[arow];
4874     aj    = a->j + ai[arow];
4875     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4876     bnzi += nlnk;
4877     /* add received col data into lnk */
4878     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4879       if (i == *nextrow[k]) { /* i-th row */
4880         anzi  = *(nextai[k]+1) - *nextai[k];
4881         aj    = buf_rj[k] + *nextai[k];
4882         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4883         bnzi += nlnk;
4884         nextrow[k]++; nextai[k]++;
4885       }
4886     }
4887     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4888 
4889     /* if free space is not available, make more free space */
4890     if (current_space->local_remaining<bnzi) {
4891       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4892       nspacedouble++;
4893     }
4894     /* copy data into free space, then initialize lnk */
4895     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4896     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4897 
4898     current_space->array           += bnzi;
4899     current_space->local_used      += bnzi;
4900     current_space->local_remaining -= bnzi;
4901 
4902     bi[i+1] = bi[i] + bnzi;
4903   }
4904 
4905   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4906 
4907   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4908   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4909   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4910 
4911   /* create symbolic parallel matrix B_mpi */
4912   /*---------------------------------------*/
4913   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4914   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4915   if (n==PETSC_DECIDE) {
4916     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4917   } else {
4918     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4919   }
4920   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4921   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4922   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4923   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4924   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4925 
4926   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4927   B_mpi->assembled    = PETSC_FALSE;
4928   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4929   merge->bi           = bi;
4930   merge->bj           = bj;
4931   merge->buf_ri       = buf_ri;
4932   merge->buf_rj       = buf_rj;
4933   merge->coi          = NULL;
4934   merge->coj          = NULL;
4935   merge->owners_co    = NULL;
4936 
4937   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4938 
4939   /* attach the supporting struct to B_mpi for reuse */
4940   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4941   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4942   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4943   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4944   *mpimat = B_mpi;
4945 
4946   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4947   PetscFunctionReturn(0);
4948 }
4949 
4950 #undef __FUNCT__
4951 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4952 /*@C
4953       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4954                  matrices from each processor
4955 
4956     Collective on MPI_Comm
4957 
4958    Input Parameters:
4959 +    comm - the communicators the parallel matrix will live on
4960 .    seqmat - the input sequential matrices
4961 .    m - number of local rows (or PETSC_DECIDE)
4962 .    n - number of local columns (or PETSC_DECIDE)
4963 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4964 
4965    Output Parameter:
4966 .    mpimat - the parallel matrix generated
4967 
4968     Level: advanced
4969 
4970    Notes:
4971      The dimensions of the sequential matrix in each processor MUST be the same.
4972      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4973      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4974 @*/
4975 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4976 {
4977   PetscErrorCode ierr;
4978   PetscMPIInt    size;
4979 
4980   PetscFunctionBegin;
4981   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4982   if (size == 1) {
4983     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4984     if (scall == MAT_INITIAL_MATRIX) {
4985       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4986     } else {
4987       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4988     }
4989     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4990     PetscFunctionReturn(0);
4991   }
4992   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4993   if (scall == MAT_INITIAL_MATRIX) {
4994     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4995   }
4996   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4997   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4998   PetscFunctionReturn(0);
4999 }
5000 
5001 #undef __FUNCT__
5002 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5003 /*@
5004      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5005           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5006           with MatGetSize()
5007 
5008     Not Collective
5009 
5010    Input Parameters:
5011 +    A - the matrix
5012 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5013 
5014    Output Parameter:
5015 .    A_loc - the local sequential matrix generated
5016 
5017     Level: developer
5018 
5019 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5020 
5021 @*/
5022 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5023 {
5024   PetscErrorCode ierr;
5025   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5026   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5027   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5028   MatScalar      *aa=a->a,*ba=b->a,*cam;
5029   PetscScalar    *ca;
5030   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5031   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5032   PetscBool      match;
5033 
5034   PetscFunctionBegin;
5035   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5036   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5037   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5038   if (scall == MAT_INITIAL_MATRIX) {
5039     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5040     ci[0] = 0;
5041     for (i=0; i<am; i++) {
5042       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5043     }
5044     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5045     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5046     k    = 0;
5047     for (i=0; i<am; i++) {
5048       ncols_o = bi[i+1] - bi[i];
5049       ncols_d = ai[i+1] - ai[i];
5050       /* off-diagonal portion of A */
5051       for (jo=0; jo<ncols_o; jo++) {
5052         col = cmap[*bj];
5053         if (col >= cstart) break;
5054         cj[k]   = col; bj++;
5055         ca[k++] = *ba++;
5056       }
5057       /* diagonal portion of A */
5058       for (j=0; j<ncols_d; j++) {
5059         cj[k]   = cstart + *aj++;
5060         ca[k++] = *aa++;
5061       }
5062       /* off-diagonal portion of A */
5063       for (j=jo; j<ncols_o; j++) {
5064         cj[k]   = cmap[*bj++];
5065         ca[k++] = *ba++;
5066       }
5067     }
5068     /* put together the new matrix */
5069     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5070     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5071     /* Since these are PETSc arrays, change flags to free them as necessary. */
5072     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5073     mat->free_a  = PETSC_TRUE;
5074     mat->free_ij = PETSC_TRUE;
5075     mat->nonew   = 0;
5076   } else if (scall == MAT_REUSE_MATRIX) {
5077     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5078     ci = mat->i; cj = mat->j; cam = mat->a;
5079     for (i=0; i<am; i++) {
5080       /* off-diagonal portion of A */
5081       ncols_o = bi[i+1] - bi[i];
5082       for (jo=0; jo<ncols_o; jo++) {
5083         col = cmap[*bj];
5084         if (col >= cstart) break;
5085         *cam++ = *ba++; bj++;
5086       }
5087       /* diagonal portion of A */
5088       ncols_d = ai[i+1] - ai[i];
5089       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5090       /* off-diagonal portion of A */
5091       for (j=jo; j<ncols_o; j++) {
5092         *cam++ = *ba++; bj++;
5093       }
5094     }
5095   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5096   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5097   PetscFunctionReturn(0);
5098 }
5099 
5100 #undef __FUNCT__
5101 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5102 /*@C
5103      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5104 
5105     Not Collective
5106 
5107    Input Parameters:
5108 +    A - the matrix
5109 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5110 -    row, col - index sets of rows and columns to extract (or NULL)
5111 
5112    Output Parameter:
5113 .    A_loc - the local sequential matrix generated
5114 
5115     Level: developer
5116 
5117 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5118 
5119 @*/
5120 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5121 {
5122   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5123   PetscErrorCode ierr;
5124   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5125   IS             isrowa,iscola;
5126   Mat            *aloc;
5127   PetscBool      match;
5128 
5129   PetscFunctionBegin;
5130   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5131   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5132   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5133   if (!row) {
5134     start = A->rmap->rstart; end = A->rmap->rend;
5135     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5136   } else {
5137     isrowa = *row;
5138   }
5139   if (!col) {
5140     start = A->cmap->rstart;
5141     cmap  = a->garray;
5142     nzA   = a->A->cmap->n;
5143     nzB   = a->B->cmap->n;
5144     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5145     ncols = 0;
5146     for (i=0; i<nzB; i++) {
5147       if (cmap[i] < start) idx[ncols++] = cmap[i];
5148       else break;
5149     }
5150     imark = i;
5151     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5152     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5153     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5154   } else {
5155     iscola = *col;
5156   }
5157   if (scall != MAT_INITIAL_MATRIX) {
5158     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5159     aloc[0] = *A_loc;
5160   }
5161   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5162   *A_loc = aloc[0];
5163   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5164   if (!row) {
5165     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5166   }
5167   if (!col) {
5168     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5169   }
5170   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5171   PetscFunctionReturn(0);
5172 }
5173 
5174 #undef __FUNCT__
5175 #define __FUNCT__ "MatGetBrowsOfAcols"
5176 /*@C
5177     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5178 
5179     Collective on Mat
5180 
5181    Input Parameters:
5182 +    A,B - the matrices in mpiaij format
5183 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5184 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5185 
5186    Output Parameter:
5187 +    rowb, colb - index sets of rows and columns of B to extract
5188 -    B_seq - the sequential matrix generated
5189 
5190     Level: developer
5191 
5192 @*/
5193 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5194 {
5195   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5196   PetscErrorCode ierr;
5197   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5198   IS             isrowb,iscolb;
5199   Mat            *bseq=NULL;
5200 
5201   PetscFunctionBegin;
5202   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5203     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5204   }
5205   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5206 
5207   if (scall == MAT_INITIAL_MATRIX) {
5208     start = A->cmap->rstart;
5209     cmap  = a->garray;
5210     nzA   = a->A->cmap->n;
5211     nzB   = a->B->cmap->n;
5212     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5213     ncols = 0;
5214     for (i=0; i<nzB; i++) {  /* row < local row index */
5215       if (cmap[i] < start) idx[ncols++] = cmap[i];
5216       else break;
5217     }
5218     imark = i;
5219     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5220     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5221     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5222     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5223   } else {
5224     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5225     isrowb  = *rowb; iscolb = *colb;
5226     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5227     bseq[0] = *B_seq;
5228   }
5229   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5230   *B_seq = bseq[0];
5231   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5232   if (!rowb) {
5233     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5234   } else {
5235     *rowb = isrowb;
5236   }
5237   if (!colb) {
5238     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5239   } else {
5240     *colb = iscolb;
5241   }
5242   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5243   PetscFunctionReturn(0);
5244 }
5245 
5246 #undef __FUNCT__
5247 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5248 /*
5249     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5250     of the OFF-DIAGONAL portion of local A
5251 
5252     Collective on Mat
5253 
5254    Input Parameters:
5255 +    A,B - the matrices in mpiaij format
5256 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5257 
5258    Output Parameter:
5259 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5260 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5261 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5262 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5263 
5264     Level: developer
5265 
5266 */
5267 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5268 {
5269   VecScatter_MPI_General *gen_to,*gen_from;
5270   PetscErrorCode         ierr;
5271   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5272   Mat_SeqAIJ             *b_oth;
5273   VecScatter             ctx =a->Mvctx;
5274   MPI_Comm               comm;
5275   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5276   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5277   PetscScalar            *rvalues,*svalues;
5278   MatScalar              *b_otha,*bufa,*bufA;
5279   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5280   MPI_Request            *rwaits = NULL,*swaits = NULL;
5281   MPI_Status             *sstatus,rstatus;
5282   PetscMPIInt            jj;
5283   PetscInt               *cols,sbs,rbs;
5284   PetscScalar            *vals;
5285 
5286   PetscFunctionBegin;
5287   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5288   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5289     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5290   }
5291   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5292   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5293 
5294   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5295   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5296   rvalues  = gen_from->values; /* holds the length of receiving row */
5297   svalues  = gen_to->values;   /* holds the length of sending row */
5298   nrecvs   = gen_from->n;
5299   nsends   = gen_to->n;
5300 
5301   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5302   srow    = gen_to->indices;    /* local row index to be sent */
5303   sstarts = gen_to->starts;
5304   sprocs  = gen_to->procs;
5305   sstatus = gen_to->sstatus;
5306   sbs     = gen_to->bs;
5307   rstarts = gen_from->starts;
5308   rprocs  = gen_from->procs;
5309   rbs     = gen_from->bs;
5310 
5311   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5312   if (scall == MAT_INITIAL_MATRIX) {
5313     /* i-array */
5314     /*---------*/
5315     /*  post receives */
5316     for (i=0; i<nrecvs; i++) {
5317       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5318       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5319       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5320     }
5321 
5322     /* pack the outgoing message */
5323     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5324 
5325     sstartsj[0] = 0;
5326     rstartsj[0] = 0;
5327     len         = 0; /* total length of j or a array to be sent */
5328     k           = 0;
5329     for (i=0; i<nsends; i++) {
5330       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5331       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5332       for (j=0; j<nrows; j++) {
5333         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5334         for (l=0; l<sbs; l++) {
5335           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5336 
5337           rowlen[j*sbs+l] = ncols;
5338 
5339           len += ncols;
5340           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5341         }
5342         k++;
5343       }
5344       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5345 
5346       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5347     }
5348     /* recvs and sends of i-array are completed */
5349     i = nrecvs;
5350     while (i--) {
5351       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5352     }
5353     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5354 
5355     /* allocate buffers for sending j and a arrays */
5356     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5357     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5358 
5359     /* create i-array of B_oth */
5360     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5361 
5362     b_othi[0] = 0;
5363     len       = 0; /* total length of j or a array to be received */
5364     k         = 0;
5365     for (i=0; i<nrecvs; i++) {
5366       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5367       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5368       for (j=0; j<nrows; j++) {
5369         b_othi[k+1] = b_othi[k] + rowlen[j];
5370         len        += rowlen[j]; k++;
5371       }
5372       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5373     }
5374 
5375     /* allocate space for j and a arrrays of B_oth */
5376     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5377     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5378 
5379     /* j-array */
5380     /*---------*/
5381     /*  post receives of j-array */
5382     for (i=0; i<nrecvs; i++) {
5383       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5384       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5385     }
5386 
5387     /* pack the outgoing message j-array */
5388     k = 0;
5389     for (i=0; i<nsends; i++) {
5390       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5391       bufJ  = bufj+sstartsj[i];
5392       for (j=0; j<nrows; j++) {
5393         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5394         for (ll=0; ll<sbs; ll++) {
5395           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5396           for (l=0; l<ncols; l++) {
5397             *bufJ++ = cols[l];
5398           }
5399           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5400         }
5401       }
5402       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5403     }
5404 
5405     /* recvs and sends of j-array are completed */
5406     i = nrecvs;
5407     while (i--) {
5408       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5409     }
5410     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5411   } else if (scall == MAT_REUSE_MATRIX) {
5412     sstartsj = *startsj_s;
5413     rstartsj = *startsj_r;
5414     bufa     = *bufa_ptr;
5415     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5416     b_otha   = b_oth->a;
5417   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5418 
5419   /* a-array */
5420   /*---------*/
5421   /*  post receives of a-array */
5422   for (i=0; i<nrecvs; i++) {
5423     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5424     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5425   }
5426 
5427   /* pack the outgoing message a-array */
5428   k = 0;
5429   for (i=0; i<nsends; i++) {
5430     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5431     bufA  = bufa+sstartsj[i];
5432     for (j=0; j<nrows; j++) {
5433       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5434       for (ll=0; ll<sbs; ll++) {
5435         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5436         for (l=0; l<ncols; l++) {
5437           *bufA++ = vals[l];
5438         }
5439         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5440       }
5441     }
5442     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5443   }
5444   /* recvs and sends of a-array are completed */
5445   i = nrecvs;
5446   while (i--) {
5447     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5448   }
5449   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5450   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5451 
5452   if (scall == MAT_INITIAL_MATRIX) {
5453     /* put together the new matrix */
5454     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5455 
5456     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5457     /* Since these are PETSc arrays, change flags to free them as necessary. */
5458     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5459     b_oth->free_a  = PETSC_TRUE;
5460     b_oth->free_ij = PETSC_TRUE;
5461     b_oth->nonew   = 0;
5462 
5463     ierr = PetscFree(bufj);CHKERRQ(ierr);
5464     if (!startsj_s || !bufa_ptr) {
5465       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5466       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5467     } else {
5468       *startsj_s = sstartsj;
5469       *startsj_r = rstartsj;
5470       *bufa_ptr  = bufa;
5471     }
5472   }
5473   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5474   PetscFunctionReturn(0);
5475 }
5476 
5477 #undef __FUNCT__
5478 #define __FUNCT__ "MatGetCommunicationStructs"
5479 /*@C
5480   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5481 
5482   Not Collective
5483 
5484   Input Parameters:
5485 . A - The matrix in mpiaij format
5486 
5487   Output Parameter:
5488 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5489 . colmap - A map from global column index to local index into lvec
5490 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5491 
5492   Level: developer
5493 
5494 @*/
5495 #if defined(PETSC_USE_CTABLE)
5496 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5497 #else
5498 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5499 #endif
5500 {
5501   Mat_MPIAIJ *a;
5502 
5503   PetscFunctionBegin;
5504   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5505   PetscValidPointer(lvec, 2);
5506   PetscValidPointer(colmap, 3);
5507   PetscValidPointer(multScatter, 4);
5508   a = (Mat_MPIAIJ*) A->data;
5509   if (lvec) *lvec = a->lvec;
5510   if (colmap) *colmap = a->colmap;
5511   if (multScatter) *multScatter = a->Mvctx;
5512   PetscFunctionReturn(0);
5513 }
5514 
5515 EXTERN_C_BEGIN
5516 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5517 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5518 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5519 EXTERN_C_END
5520 
5521 #undef __FUNCT__
5522 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5523 /*
5524     Computes (B'*A')' since computing B*A directly is untenable
5525 
5526                n                       p                          p
5527         (              )       (              )         (                  )
5528       m (      A       )  *  n (       B      )   =   m (         C        )
5529         (              )       (              )         (                  )
5530 
5531 */
5532 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5533 {
5534   PetscErrorCode ierr;
5535   Mat            At,Bt,Ct;
5536 
5537   PetscFunctionBegin;
5538   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5539   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5540   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5541   ierr = MatDestroy(&At);CHKERRQ(ierr);
5542   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5543   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5544   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5545   PetscFunctionReturn(0);
5546 }
5547 
5548 #undef __FUNCT__
5549 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5550 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5551 {
5552   PetscErrorCode ierr;
5553   PetscInt       m=A->rmap->n,n=B->cmap->n;
5554   Mat            Cmat;
5555 
5556   PetscFunctionBegin;
5557   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5558   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5559   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5560   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5561   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5562   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5563   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5564   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5565 
5566   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5567 
5568   *C = Cmat;
5569   PetscFunctionReturn(0);
5570 }
5571 
5572 /* ----------------------------------------------------------------*/
5573 #undef __FUNCT__
5574 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5575 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5576 {
5577   PetscErrorCode ierr;
5578 
5579   PetscFunctionBegin;
5580   if (scall == MAT_INITIAL_MATRIX) {
5581     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5582   }
5583   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5584   PetscFunctionReturn(0);
5585 }
5586 
5587 EXTERN_C_BEGIN
5588 #if defined(PETSC_HAVE_MUMPS)
5589 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5590 #endif
5591 #if defined(PETSC_HAVE_PASTIX)
5592 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5593 #endif
5594 #if defined(PETSC_HAVE_SUPERLU_DIST)
5595 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5596 #endif
5597 #if defined(PETSC_HAVE_CLIQUE)
5598 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5599 #endif
5600 EXTERN_C_END
5601 
5602 /*MC
5603    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5604 
5605    Options Database Keys:
5606 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5607 
5608   Level: beginner
5609 
5610 .seealso: MatCreateAIJ()
5611 M*/
5612 
5613 EXTERN_C_BEGIN
5614 #undef __FUNCT__
5615 #define __FUNCT__ "MatCreate_MPIAIJ"
5616 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5617 {
5618   Mat_MPIAIJ     *b;
5619   PetscErrorCode ierr;
5620   PetscMPIInt    size;
5621 
5622   PetscFunctionBegin;
5623   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5624 
5625   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5626   B->data       = (void*)b;
5627   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5628   B->assembled  = PETSC_FALSE;
5629   B->insertmode = NOT_SET_VALUES;
5630   b->size       = size;
5631 
5632   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5633 
5634   /* build cache for off array entries formed */
5635   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5636 
5637   b->donotstash  = PETSC_FALSE;
5638   b->colmap      = 0;
5639   b->garray      = 0;
5640   b->roworiented = PETSC_TRUE;
5641 
5642   /* stuff used for matrix vector multiply */
5643   b->lvec  = NULL;
5644   b->Mvctx = NULL;
5645 
5646   /* stuff for MatGetRow() */
5647   b->rowindices   = 0;
5648   b->rowvalues    = 0;
5649   b->getrowactive = PETSC_FALSE;
5650 
5651   /* flexible pointer used in CUSP/CUSPARSE classes */
5652   b->spptr = NULL;
5653 
5654 #if defined(PETSC_HAVE_MUMPS)
5655   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5656                                            "MatGetFactor_aij_mumps",
5657                                            MatGetFactor_aij_mumps);CHKERRQ(ierr);
5658 #endif
5659 #if defined(PETSC_HAVE_PASTIX)
5660   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5661                                            "MatGetFactor_mpiaij_pastix",
5662                                            MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5663 #endif
5664 #if defined(PETSC_HAVE_SUPERLU_DIST)
5665   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5666                                            "MatGetFactor_mpiaij_superlu_dist",
5667                                            MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5668 #endif
5669 #if defined(PETSC_HAVE_CLIQUE)
5670   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5671                                            "MatGetFactor_aij_clique",
5672                                            MatGetFactor_aij_clique);CHKERRQ(ierr);
5673 #endif
5674   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5675                                            "MatStoreValues_MPIAIJ",
5676                                            MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5677   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5678                                            "MatRetrieveValues_MPIAIJ",
5679                                            MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5680   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5681                                            "MatGetDiagonalBlock_MPIAIJ",
5682                                            MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5683   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5684                                            "MatIsTranspose_MPIAIJ",
5685                                            MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5686   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5687                                            "MatMPIAIJSetPreallocation_MPIAIJ",
5688                                            MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5689   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5690                                            "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5691                                            MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5692   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5693                                            "MatDiagonalScaleLocal_MPIAIJ",
5694                                            MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5695   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5696                                            "MatConvert_MPIAIJ_MPIAIJPERM",
5697                                            MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5698   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5699                                            "MatConvert_MPIAIJ_MPIAIJCRL",
5700                                            MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5701   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5702                                            "MatConvert_MPIAIJ_MPISBAIJ",
5703                                            MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5704   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5705                                            "MatMatMult_MPIDense_MPIAIJ",
5706                                            MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5707   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5708                                            "MatMatMultSymbolic_MPIDense_MPIAIJ",
5709                                            MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5710   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5711                                            "MatMatMultNumeric_MPIDense_MPIAIJ",
5712                                            MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5713   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5714   PetscFunctionReturn(0);
5715 }
5716 EXTERN_C_END
5717 
5718 #undef __FUNCT__
5719 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5720 /*@
5721      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5722          and "off-diagonal" part of the matrix in CSR format.
5723 
5724    Collective on MPI_Comm
5725 
5726    Input Parameters:
5727 +  comm - MPI communicator
5728 .  m - number of local rows (Cannot be PETSC_DECIDE)
5729 .  n - This value should be the same as the local size used in creating the
5730        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5731        calculated if N is given) For square matrices n is almost always m.
5732 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5733 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5734 .   i - row indices for "diagonal" portion of matrix
5735 .   j - column indices
5736 .   a - matrix values
5737 .   oi - row indices for "off-diagonal" portion of matrix
5738 .   oj - column indices
5739 -   oa - matrix values
5740 
5741    Output Parameter:
5742 .   mat - the matrix
5743 
5744    Level: advanced
5745 
5746    Notes:
5747        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5748        must free the arrays once the matrix has been destroyed and not before.
5749 
5750        The i and j indices are 0 based
5751 
5752        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5753 
5754        This sets local rows and cannot be used to set off-processor values.
5755 
5756        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5757        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5758        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5759        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5760        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5761        communication if it is known that only local entries will be set.
5762 
5763 .keywords: matrix, aij, compressed row, sparse, parallel
5764 
5765 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5766           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5767 @*/
5768 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5769 {
5770   PetscErrorCode ierr;
5771   Mat_MPIAIJ     *maij;
5772 
5773   PetscFunctionBegin;
5774   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5775   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5776   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5777   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5778   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5779   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5780   maij = (Mat_MPIAIJ*) (*mat)->data;
5781 
5782   (*mat)->preallocated = PETSC_TRUE;
5783 
5784   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5785   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5786 
5787   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5788   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5789 
5790   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5791   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5792   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5793   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5794 
5795   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5796   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5797   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5798   PetscFunctionReturn(0);
5799 }
5800 
5801 /*
5802     Special version for direct calls from Fortran
5803 */
5804 #include <petsc-private/fortranimpl.h>
5805 
5806 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5807 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5808 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5809 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5810 #endif
5811 
5812 /* Change these macros so can be used in void function */
5813 #undef CHKERRQ
5814 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5815 #undef SETERRQ2
5816 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5817 #undef SETERRQ3
5818 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5819 #undef SETERRQ
5820 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5821 
5822 EXTERN_C_BEGIN
5823 #undef __FUNCT__
5824 #define __FUNCT__ "matsetvaluesmpiaij_"
5825 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5826 {
5827   Mat            mat  = *mmat;
5828   PetscInt       m    = *mm, n = *mn;
5829   InsertMode     addv = *maddv;
5830   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5831   PetscScalar    value;
5832   PetscErrorCode ierr;
5833 
5834   MatCheckPreallocated(mat,1);
5835   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5836 
5837 #if defined(PETSC_USE_DEBUG)
5838   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5839 #endif
5840   {
5841     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5842     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5843     PetscBool roworiented = aij->roworiented;
5844 
5845     /* Some Variables required in the macro */
5846     Mat        A                 = aij->A;
5847     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5848     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5849     MatScalar  *aa               = a->a;
5850     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5851     Mat        B                 = aij->B;
5852     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5853     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5854     MatScalar  *ba               = b->a;
5855 
5856     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5857     PetscInt  nonew = a->nonew;
5858     MatScalar *ap1,*ap2;
5859 
5860     PetscFunctionBegin;
5861     for (i=0; i<m; i++) {
5862       if (im[i] < 0) continue;
5863 #if defined(PETSC_USE_DEBUG)
5864       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5865 #endif
5866       if (im[i] >= rstart && im[i] < rend) {
5867         row      = im[i] - rstart;
5868         lastcol1 = -1;
5869         rp1      = aj + ai[row];
5870         ap1      = aa + ai[row];
5871         rmax1    = aimax[row];
5872         nrow1    = ailen[row];
5873         low1     = 0;
5874         high1    = nrow1;
5875         lastcol2 = -1;
5876         rp2      = bj + bi[row];
5877         ap2      = ba + bi[row];
5878         rmax2    = bimax[row];
5879         nrow2    = bilen[row];
5880         low2     = 0;
5881         high2    = nrow2;
5882 
5883         for (j=0; j<n; j++) {
5884           if (roworiented) value = v[i*n+j];
5885           else value = v[i+j*m];
5886           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5887           if (in[j] >= cstart && in[j] < cend) {
5888             col = in[j] - cstart;
5889             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5890           } else if (in[j] < 0) continue;
5891 #if defined(PETSC_USE_DEBUG)
5892           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5893 #endif
5894           else {
5895             if (mat->was_assembled) {
5896               if (!aij->colmap) {
5897                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5898               }
5899 #if defined(PETSC_USE_CTABLE)
5900               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5901               col--;
5902 #else
5903               col = aij->colmap[in[j]] - 1;
5904 #endif
5905               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5906                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5907                 col  =  in[j];
5908                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5909                 B     = aij->B;
5910                 b     = (Mat_SeqAIJ*)B->data;
5911                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5912                 rp2   = bj + bi[row];
5913                 ap2   = ba + bi[row];
5914                 rmax2 = bimax[row];
5915                 nrow2 = bilen[row];
5916                 low2  = 0;
5917                 high2 = nrow2;
5918                 bm    = aij->B->rmap->n;
5919                 ba    = b->a;
5920               }
5921             } else col = in[j];
5922             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5923           }
5924         }
5925       } else if (!aij->donotstash) {
5926         if (roworiented) {
5927           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5928         } else {
5929           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5930         }
5931       }
5932     }
5933   }
5934   PetscFunctionReturnVoid();
5935 }
5936 EXTERN_C_END
5937 
5938