xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fb8e56e08d4d0bfe9fc63603ca1f7fddd68abbdb)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2511   PetscInt    nzlocal,nsends,nrecvs;
2512   PetscMPIInt *send_rank,*recv_rank;
2513   PetscInt    *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2514   PetscScalar *sbuf_a,**rbuf_a;
2515   PetscErrorCode (*Destroy)(Mat);
2516 } Mat_Redundant;
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2520 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2521 {
2522   PetscErrorCode ierr;
2523   Mat_Redundant  *redund=(Mat_Redundant*)ptr;
2524   PetscInt       i;
2525 
2526   PetscFunctionBegin;
2527   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2528   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2529   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2530   for (i=0; i<redund->nrecvs; i++) {
2531     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2532     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2533   }
2534   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2535   ierr = PetscFree(redund);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatDestroy_MatRedundant"
2541 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2542 {
2543   PetscErrorCode ierr;
2544   PetscContainer container;
2545   Mat_Redundant  *redund=NULL;
2546 
2547   PetscFunctionBegin;
2548   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2549   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2550   ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2551 
2552   A->ops->destroy = redund->Destroy;
2553 
2554   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2555   if (A->ops->destroy) {
2556     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2557   }
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2563 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2564 {
2565   PetscMPIInt    rank,size;
2566   MPI_Comm       comm;
2567   PetscErrorCode ierr;
2568   PetscInt       nsends    = 0,nrecvs=0,i,rownz_max=0;
2569   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL;
2570   PetscInt       *rowrange = mat->rmap->range;
2571   Mat_MPIAIJ     *aij      = (Mat_MPIAIJ*)mat->data;
2572   Mat            A         = aij->A,B=aij->B,C=*matredundant;
2573   Mat_SeqAIJ     *a        = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2574   PetscScalar    *sbuf_a;
2575   PetscInt       nzlocal=a->nz+b->nz;
2576   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2577   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2578   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2579   MatScalar      *aworkA,*aworkB;
2580   PetscScalar    *vals;
2581   PetscMPIInt    tag1,tag2,tag3,imdex;
2582   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2583   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2584   MPI_Status     recv_status,*send_status;
2585   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2586   PetscInt       **rbuf_j=NULL;
2587   PetscScalar    **rbuf_a=NULL;
2588   Mat_Redundant  *redund =NULL;
2589   PetscContainer container;
2590 
2591   PetscFunctionBegin;
2592   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2593   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2594   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2595 
2596   if (reuse == MAT_REUSE_MATRIX) {
2597     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2598     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2599     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2600     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2601     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2602     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2603     ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2604     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2605 
2606     nsends    = redund->nsends;
2607     nrecvs    = redund->nrecvs;
2608     send_rank = redund->send_rank;
2609     recv_rank = redund->recv_rank;
2610     sbuf_nz   = redund->sbuf_nz;
2611     rbuf_nz   = redund->rbuf_nz;
2612     sbuf_j    = redund->sbuf_j;
2613     sbuf_a    = redund->sbuf_a;
2614     rbuf_j    = redund->rbuf_j;
2615     rbuf_a    = redund->rbuf_a;
2616   }
2617 
2618   if (reuse == MAT_INITIAL_MATRIX) {
2619     PetscMPIInt subrank,subsize;
2620     PetscInt    nleftover,np_subcomm;
2621     /* get the destination processors' id send_rank, nsends and nrecvs */
2622     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2623     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2624     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2625 
2626     np_subcomm = size/nsubcomm;
2627     nleftover  = size - nsubcomm*np_subcomm;
2628 
2629     nsends = 0; nrecvs = 0;
2630     for (i=0; i<size; i++) { /* i=rank*/
2631       if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2632         send_rank[nsends]   = i; nsends++;
2633         recv_rank[nrecvs++] = i;
2634       }
2635     }
2636     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2637       i = size-nleftover-1;
2638       j = 0;
2639       while (j < nsubcomm - nleftover) {
2640         send_rank[nsends++] = i;
2641         i--; j++;
2642       }
2643     }
2644 
2645     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2646       for (i=0; i<nleftover; i++) {
2647         recv_rank[nrecvs++] = size-nleftover+i;
2648       }
2649     }
2650 
2651     /* allocate sbuf_j, sbuf_a */
2652     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2653     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2654     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2655   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2656 
2657   /* copy mat's local entries into the buffers */
2658   if (reuse == MAT_INITIAL_MATRIX) {
2659     rownz_max = 0;
2660     rptr      = sbuf_j;
2661     cols      = sbuf_j + rend-rstart + 1;
2662     vals      = sbuf_a;
2663     rptr[0]   = 0;
2664     for (i=0; i<rend-rstart; i++) {
2665       row    = i + rstart;
2666       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2667       ncols  = nzA + nzB;
2668       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2669       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2670       /* load the column indices for this row into cols */
2671       lwrite = 0;
2672       for (l=0; l<nzB; l++) {
2673         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2674           vals[lwrite]   = aworkB[l];
2675           cols[lwrite++] = ctmp;
2676         }
2677       }
2678       for (l=0; l<nzA; l++) {
2679         vals[lwrite]   = aworkA[l];
2680         cols[lwrite++] = cstart + cworkA[l];
2681       }
2682       for (l=0; l<nzB; l++) {
2683         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2684           vals[lwrite]   = aworkB[l];
2685           cols[lwrite++] = ctmp;
2686         }
2687       }
2688       vals     += ncols;
2689       cols     += ncols;
2690       rptr[i+1] = rptr[i] + ncols;
2691       if (rownz_max < ncols) rownz_max = ncols;
2692     }
2693     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2694   } else { /* only copy matrix values into sbuf_a */
2695     rptr    = sbuf_j;
2696     vals    = sbuf_a;
2697     rptr[0] = 0;
2698     for (i=0; i<rend-rstart; i++) {
2699       row    = i + rstart;
2700       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2701       ncols  = nzA + nzB;
2702       cworkB = b->j + b->i[i];
2703       aworkA = a->a + a->i[i];
2704       aworkB = b->a + b->i[i];
2705       lwrite = 0;
2706       for (l=0; l<nzB; l++) {
2707         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2708       }
2709       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2710       for (l=0; l<nzB; l++) {
2711         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2712       }
2713       vals     += ncols;
2714       rptr[i+1] = rptr[i] + ncols;
2715     }
2716   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2717 
2718   /* send nzlocal to others, and recv other's nzlocal */
2719   /*--------------------------------------------------*/
2720   if (reuse == MAT_INITIAL_MATRIX) {
2721     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2722 
2723     s_waits2 = s_waits3 + nsends;
2724     s_waits1 = s_waits2 + nsends;
2725     r_waits1 = s_waits1 + nsends;
2726     r_waits2 = r_waits1 + nrecvs;
2727     r_waits3 = r_waits2 + nrecvs;
2728   } else {
2729     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2730 
2731     r_waits3 = s_waits3 + nsends;
2732   }
2733 
2734   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2735   if (reuse == MAT_INITIAL_MATRIX) {
2736     /* get new tags to keep the communication clean */
2737     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2738     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2739     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2740 
2741     /* post receives of other's nzlocal */
2742     for (i=0; i<nrecvs; i++) {
2743       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2744     }
2745     /* send nzlocal to others */
2746     for (i=0; i<nsends; i++) {
2747       sbuf_nz[i] = nzlocal;
2748       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2749     }
2750     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2751     count = nrecvs;
2752     while (count) {
2753       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2754 
2755       recv_rank[imdex] = recv_status.MPI_SOURCE;
2756       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2757       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2758 
2759       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2760 
2761       rbuf_nz[imdex] += i + 2;
2762 
2763       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2764       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2765       count--;
2766     }
2767     /* wait on sends of nzlocal */
2768     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2769     /* send mat->i,j to others, and recv from other's */
2770     /*------------------------------------------------*/
2771     for (i=0; i<nsends; i++) {
2772       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2773       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2774     }
2775     /* wait on receives of mat->i,j */
2776     /*------------------------------*/
2777     count = nrecvs;
2778     while (count) {
2779       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2780       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2781       count--;
2782     }
2783     /* wait on sends of mat->i,j */
2784     /*---------------------------*/
2785     if (nsends) {
2786       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2787     }
2788   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2789 
2790   /* post receives, send and receive mat->a */
2791   /*----------------------------------------*/
2792   for (imdex=0; imdex<nrecvs; imdex++) {
2793     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2794   }
2795   for (i=0; i<nsends; i++) {
2796     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2797   }
2798   count = nrecvs;
2799   while (count) {
2800     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2801     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2802     count--;
2803   }
2804   if (nsends) {
2805     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2806   }
2807 
2808   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2809 
2810   /* create redundant matrix */
2811   /*-------------------------*/
2812   if (reuse == MAT_INITIAL_MATRIX) {
2813     /* compute rownz_max for preallocation */
2814     for (imdex=0; imdex<nrecvs; imdex++) {
2815       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2816       rptr = rbuf_j[imdex];
2817       for (i=0; i<j; i++) {
2818         ncols = rptr[i+1] - rptr[i];
2819         if (rownz_max < ncols) rownz_max = ncols;
2820       }
2821     }
2822 
2823     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2824     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2825     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2826     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2827     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2828     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2829   } else {
2830     C = *matredundant;
2831   }
2832 
2833   /* insert local matrix entries */
2834   rptr = sbuf_j;
2835   cols = sbuf_j + rend-rstart + 1;
2836   vals = sbuf_a;
2837   for (i=0; i<rend-rstart; i++) {
2838     row   = i + rstart;
2839     ncols = rptr[i+1] - rptr[i];
2840     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2841     vals += ncols;
2842     cols += ncols;
2843   }
2844   /* insert received matrix entries */
2845   for (imdex=0; imdex<nrecvs; imdex++) {
2846     rstart = rowrange[recv_rank[imdex]];
2847     rend   = rowrange[recv_rank[imdex]+1];
2848     rptr   = rbuf_j[imdex];
2849     cols   = rbuf_j[imdex] + rend-rstart + 1;
2850     vals   = rbuf_a[imdex];
2851     for (i=0; i<rend-rstart; i++) {
2852       row   = i + rstart;
2853       ncols = rptr[i+1] - rptr[i];
2854       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2855       vals += ncols;
2856       cols += ncols;
2857     }
2858   }
2859   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2860   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2861   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2862   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2863   if (reuse == MAT_INITIAL_MATRIX) {
2864     PetscContainer container;
2865     *matredundant = C;
2866     /* create a supporting struct and attach it to C for reuse */
2867     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2868     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2869     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2870     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2871     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2872     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2873 
2874     redund->nzlocal   = nzlocal;
2875     redund->nsends    = nsends;
2876     redund->nrecvs    = nrecvs;
2877     redund->send_rank = send_rank;
2878     redund->recv_rank = recv_rank;
2879     redund->sbuf_nz   = sbuf_nz;
2880     redund->rbuf_nz   = rbuf_nz;
2881     redund->sbuf_j    = sbuf_j;
2882     redund->sbuf_a    = sbuf_a;
2883     redund->rbuf_j    = rbuf_j;
2884     redund->rbuf_a    = rbuf_a;
2885 
2886     redund->Destroy = C->ops->destroy;
2887     C->ops->destroy = MatDestroy_MatRedundant;
2888   }
2889   PetscFunctionReturn(0);
2890 }
2891 
2892 #undef __FUNCT__
2893 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2894 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2895 {
2896   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2897   PetscErrorCode ierr;
2898   PetscInt       i,*idxb = 0;
2899   PetscScalar    *va,*vb;
2900   Vec            vtmp;
2901 
2902   PetscFunctionBegin;
2903   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2904   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2905   if (idx) {
2906     for (i=0; i<A->rmap->n; i++) {
2907       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2908     }
2909   }
2910 
2911   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2912   if (idx) {
2913     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2914   }
2915   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2916   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2917 
2918   for (i=0; i<A->rmap->n; i++) {
2919     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2920       va[i] = vb[i];
2921       if (idx) idx[i] = a->garray[idxb[i]];
2922     }
2923   }
2924 
2925   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2926   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2927   ierr = PetscFree(idxb);CHKERRQ(ierr);
2928   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2929   PetscFunctionReturn(0);
2930 }
2931 
2932 #undef __FUNCT__
2933 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2934 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2935 {
2936   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2937   PetscErrorCode ierr;
2938   PetscInt       i,*idxb = 0;
2939   PetscScalar    *va,*vb;
2940   Vec            vtmp;
2941 
2942   PetscFunctionBegin;
2943   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2944   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2945   if (idx) {
2946     for (i=0; i<A->cmap->n; i++) {
2947       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2948     }
2949   }
2950 
2951   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2952   if (idx) {
2953     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2954   }
2955   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2956   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2957 
2958   for (i=0; i<A->rmap->n; i++) {
2959     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2960       va[i] = vb[i];
2961       if (idx) idx[i] = a->garray[idxb[i]];
2962     }
2963   }
2964 
2965   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2966   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2967   ierr = PetscFree(idxb);CHKERRQ(ierr);
2968   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2969   PetscFunctionReturn(0);
2970 }
2971 
2972 #undef __FUNCT__
2973 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2974 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2975 {
2976   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2977   PetscInt       n      = A->rmap->n;
2978   PetscInt       cstart = A->cmap->rstart;
2979   PetscInt       *cmap  = mat->garray;
2980   PetscInt       *diagIdx, *offdiagIdx;
2981   Vec            diagV, offdiagV;
2982   PetscScalar    *a, *diagA, *offdiagA;
2983   PetscInt       r;
2984   PetscErrorCode ierr;
2985 
2986   PetscFunctionBegin;
2987   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2988   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2989   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2990   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2991   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2992   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2993   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2994   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2995   for (r = 0; r < n; ++r) {
2996     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2997       a[r]   = diagA[r];
2998       idx[r] = cstart + diagIdx[r];
2999     } else {
3000       a[r]   = offdiagA[r];
3001       idx[r] = cmap[offdiagIdx[r]];
3002     }
3003   }
3004   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3005   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3006   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3007   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3008   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3009   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 #undef __FUNCT__
3014 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3015 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3016 {
3017   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3018   PetscInt       n      = A->rmap->n;
3019   PetscInt       cstart = A->cmap->rstart;
3020   PetscInt       *cmap  = mat->garray;
3021   PetscInt       *diagIdx, *offdiagIdx;
3022   Vec            diagV, offdiagV;
3023   PetscScalar    *a, *diagA, *offdiagA;
3024   PetscInt       r;
3025   PetscErrorCode ierr;
3026 
3027   PetscFunctionBegin;
3028   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3029   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3030   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3031   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3032   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3033   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3034   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3035   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3036   for (r = 0; r < n; ++r) {
3037     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3038       a[r]   = diagA[r];
3039       idx[r] = cstart + diagIdx[r];
3040     } else {
3041       a[r]   = offdiagA[r];
3042       idx[r] = cmap[offdiagIdx[r]];
3043     }
3044   }
3045   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3046   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3047   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3048   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3049   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3050   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3051   PetscFunctionReturn(0);
3052 }
3053 
3054 #undef __FUNCT__
3055 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3056 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3057 {
3058   PetscErrorCode ierr;
3059   Mat            *dummy;
3060 
3061   PetscFunctionBegin;
3062   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3063   *newmat = *dummy;
3064   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3065   PetscFunctionReturn(0);
3066 }
3067 
3068 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3069 
3070 #undef __FUNCT__
3071 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3072 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3073 {
3074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3075   PetscErrorCode ierr;
3076 
3077   PetscFunctionBegin;
3078   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3079   PetscFunctionReturn(0);
3080 }
3081 
3082 #undef __FUNCT__
3083 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3084 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3085 {
3086   PetscErrorCode ierr;
3087   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3088 
3089   PetscFunctionBegin;
3090   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3091   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3092   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3093   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3094   PetscFunctionReturn(0);
3095 }
3096 
3097 /* -------------------------------------------------------------------*/
3098 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3099                                        MatGetRow_MPIAIJ,
3100                                        MatRestoreRow_MPIAIJ,
3101                                        MatMult_MPIAIJ,
3102                                 /* 4*/ MatMultAdd_MPIAIJ,
3103                                        MatMultTranspose_MPIAIJ,
3104                                        MatMultTransposeAdd_MPIAIJ,
3105 #if defined(PETSC_HAVE_PBGL)
3106                                        MatSolve_MPIAIJ,
3107 #else
3108                                        0,
3109 #endif
3110                                        0,
3111                                        0,
3112                                 /*10*/ 0,
3113                                        0,
3114                                        0,
3115                                        MatSOR_MPIAIJ,
3116                                        MatTranspose_MPIAIJ,
3117                                 /*15*/ MatGetInfo_MPIAIJ,
3118                                        MatEqual_MPIAIJ,
3119                                        MatGetDiagonal_MPIAIJ,
3120                                        MatDiagonalScale_MPIAIJ,
3121                                        MatNorm_MPIAIJ,
3122                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3123                                        MatAssemblyEnd_MPIAIJ,
3124                                        MatSetOption_MPIAIJ,
3125                                        MatZeroEntries_MPIAIJ,
3126                                 /*24*/ MatZeroRows_MPIAIJ,
3127                                        0,
3128 #if defined(PETSC_HAVE_PBGL)
3129                                        0,
3130 #else
3131                                        0,
3132 #endif
3133                                        0,
3134                                        0,
3135                                 /*29*/ MatSetUp_MPIAIJ,
3136 #if defined(PETSC_HAVE_PBGL)
3137                                        0,
3138 #else
3139                                        0,
3140 #endif
3141                                        0,
3142                                        0,
3143                                        0,
3144                                 /*34*/ MatDuplicate_MPIAIJ,
3145                                        0,
3146                                        0,
3147                                        0,
3148                                        0,
3149                                 /*39*/ MatAXPY_MPIAIJ,
3150                                        MatGetSubMatrices_MPIAIJ,
3151                                        MatIncreaseOverlap_MPIAIJ,
3152                                        MatGetValues_MPIAIJ,
3153                                        MatCopy_MPIAIJ,
3154                                 /*44*/ MatGetRowMax_MPIAIJ,
3155                                        MatScale_MPIAIJ,
3156                                        0,
3157                                        0,
3158                                        MatZeroRowsColumns_MPIAIJ,
3159                                 /*49*/ MatSetRandom_MPIAIJ,
3160                                        0,
3161                                        0,
3162                                        0,
3163                                        0,
3164                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3165                                        0,
3166                                        MatSetUnfactored_MPIAIJ,
3167                                        MatPermute_MPIAIJ,
3168                                        0,
3169                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3170                                        MatDestroy_MPIAIJ,
3171                                        MatView_MPIAIJ,
3172                                        0,
3173                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3174                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3175                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3176                                        0,
3177                                        0,
3178                                        0,
3179                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3180                                        MatGetRowMinAbs_MPIAIJ,
3181                                        0,
3182                                        MatSetColoring_MPIAIJ,
3183                                        0,
3184                                        MatSetValuesAdifor_MPIAIJ,
3185                                 /*75*/ MatFDColoringApply_AIJ,
3186                                        0,
3187                                        0,
3188                                        0,
3189                                        MatFindZeroDiagonals_MPIAIJ,
3190                                 /*80*/ 0,
3191                                        0,
3192                                        0,
3193                                 /*83*/ MatLoad_MPIAIJ,
3194                                        0,
3195                                        0,
3196                                        0,
3197                                        0,
3198                                        0,
3199                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3200                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3201                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3202                                        MatPtAP_MPIAIJ_MPIAIJ,
3203                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3204                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3205                                        0,
3206                                        0,
3207                                        0,
3208                                        0,
3209                                 /*99*/ 0,
3210                                        0,
3211                                        0,
3212                                        MatConjugate_MPIAIJ,
3213                                        0,
3214                                 /*104*/MatSetValuesRow_MPIAIJ,
3215                                        MatRealPart_MPIAIJ,
3216                                        MatImaginaryPart_MPIAIJ,
3217                                        0,
3218                                        0,
3219                                 /*109*/0,
3220                                        MatGetRedundantMatrix_MPIAIJ,
3221                                        MatGetRowMin_MPIAIJ,
3222                                        0,
3223                                        0,
3224                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3225                                        0,
3226                                        0,
3227                                        0,
3228                                        0,
3229                                 /*119*/0,
3230                                        0,
3231                                        0,
3232                                        0,
3233                                        MatGetMultiProcBlock_MPIAIJ,
3234                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3235                                        MatGetColumnNorms_MPIAIJ,
3236                                        MatInvertBlockDiagonal_MPIAIJ,
3237                                        0,
3238                                        MatGetSubMatricesParallel_MPIAIJ,
3239                                 /*129*/0,
3240                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3241                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3242                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3243                                        0,
3244                                 /*134*/0,
3245                                        0,
3246                                        0,
3247                                        0,
3248                                        0,
3249                                 /*139*/0,
3250                                        0
3251 };
3252 
3253 /* ----------------------------------------------------------------------------------------*/
3254 
3255 #undef __FUNCT__
3256 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3257 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3258 {
3259   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3260   PetscErrorCode ierr;
3261 
3262   PetscFunctionBegin;
3263   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3264   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3265   PetscFunctionReturn(0);
3266 }
3267 
3268 #undef __FUNCT__
3269 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3270 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3271 {
3272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3273   PetscErrorCode ierr;
3274 
3275   PetscFunctionBegin;
3276   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3277   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3278   PetscFunctionReturn(0);
3279 }
3280 
3281 #undef __FUNCT__
3282 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3283 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3284 {
3285   Mat_MPIAIJ     *b;
3286   PetscErrorCode ierr;
3287 
3288   PetscFunctionBegin;
3289   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3290   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3291   b = (Mat_MPIAIJ*)B->data;
3292 
3293   if (!B->preallocated) {
3294     /* Explicitly create 2 MATSEQAIJ matrices. */
3295     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3296     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3297     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3298     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3299     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3300     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3301     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3302     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3303     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3304     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3305   }
3306 
3307   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3308   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3309   B->preallocated = PETSC_TRUE;
3310   PetscFunctionReturn(0);
3311 }
3312 
3313 #undef __FUNCT__
3314 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3315 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3316 {
3317   Mat            mat;
3318   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3319   PetscErrorCode ierr;
3320 
3321   PetscFunctionBegin;
3322   *newmat = 0;
3323   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3324   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3325   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3326   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3327   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3328   a       = (Mat_MPIAIJ*)mat->data;
3329 
3330   mat->factortype   = matin->factortype;
3331   mat->rmap->bs     = matin->rmap->bs;
3332   mat->cmap->bs     = matin->cmap->bs;
3333   mat->assembled    = PETSC_TRUE;
3334   mat->insertmode   = NOT_SET_VALUES;
3335   mat->preallocated = PETSC_TRUE;
3336 
3337   a->size         = oldmat->size;
3338   a->rank         = oldmat->rank;
3339   a->donotstash   = oldmat->donotstash;
3340   a->roworiented  = oldmat->roworiented;
3341   a->rowindices   = 0;
3342   a->rowvalues    = 0;
3343   a->getrowactive = PETSC_FALSE;
3344 
3345   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3346   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3347 
3348   if (oldmat->colmap) {
3349 #if defined(PETSC_USE_CTABLE)
3350     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3351 #else
3352     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3353     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3354     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3355 #endif
3356   } else a->colmap = 0;
3357   if (oldmat->garray) {
3358     PetscInt len;
3359     len  = oldmat->B->cmap->n;
3360     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3361     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3362     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3363   } else a->garray = 0;
3364 
3365   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3366   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3367   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3368   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3369   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3370   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3371   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3372   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3373   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3374   *newmat = mat;
3375   PetscFunctionReturn(0);
3376 }
3377 
3378 
3379 
3380 #undef __FUNCT__
3381 #define __FUNCT__ "MatLoad_MPIAIJ"
3382 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3383 {
3384   PetscScalar    *vals,*svals;
3385   MPI_Comm       comm;
3386   PetscErrorCode ierr;
3387   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3388   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3389   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3390   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3391   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3392   int            fd;
3393   PetscInt       bs = 1;
3394 
3395   PetscFunctionBegin;
3396   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3397   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3398   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3399   if (!rank) {
3400     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3401     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3402     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3403   }
3404 
3405   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3406   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3407   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3408 
3409   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3410 
3411   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3412   M    = header[1]; N = header[2];
3413   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3414   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3415   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3416 
3417   /* If global sizes are set, check if they are consistent with that given in the file */
3418   if (sizesset) {
3419     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3420   }
3421   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3422   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3423 
3424   /* determine ownership of all (block) rows */
3425   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3426   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3427   else m = newMat->rmap->n; /* Set by user */
3428 
3429   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3430   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3431 
3432   /* First process needs enough room for process with most rows */
3433   if (!rank) {
3434     mmax = rowners[1];
3435     for (i=2; i<=size; i++) {
3436       mmax = PetscMax(mmax, rowners[i]);
3437     }
3438   } else mmax = -1;             /* unused, but compilers complain */
3439 
3440   rowners[0] = 0;
3441   for (i=2; i<=size; i++) {
3442     rowners[i] += rowners[i-1];
3443   }
3444   rstart = rowners[rank];
3445   rend   = rowners[rank+1];
3446 
3447   /* distribute row lengths to all processors */
3448   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3449   if (!rank) {
3450     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3451     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3452     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3453     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3454     for (j=0; j<m; j++) {
3455       procsnz[0] += ourlens[j];
3456     }
3457     for (i=1; i<size; i++) {
3458       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3459       /* calculate the number of nonzeros on each processor */
3460       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3461         procsnz[i] += rowlengths[j];
3462       }
3463       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3464     }
3465     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3466   } else {
3467     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3468   }
3469 
3470   if (!rank) {
3471     /* determine max buffer needed and allocate it */
3472     maxnz = 0;
3473     for (i=0; i<size; i++) {
3474       maxnz = PetscMax(maxnz,procsnz[i]);
3475     }
3476     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3477 
3478     /* read in my part of the matrix column indices  */
3479     nz   = procsnz[0];
3480     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3481     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3482 
3483     /* read in every one elses and ship off */
3484     for (i=1; i<size; i++) {
3485       nz   = procsnz[i];
3486       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3487       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3488     }
3489     ierr = PetscFree(cols);CHKERRQ(ierr);
3490   } else {
3491     /* determine buffer space needed for message */
3492     nz = 0;
3493     for (i=0; i<m; i++) {
3494       nz += ourlens[i];
3495     }
3496     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3497 
3498     /* receive message of column indices*/
3499     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3500   }
3501 
3502   /* determine column ownership if matrix is not square */
3503   if (N != M) {
3504     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3505     else n = newMat->cmap->n;
3506     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3507     cstart = cend - n;
3508   } else {
3509     cstart = rstart;
3510     cend   = rend;
3511     n      = cend - cstart;
3512   }
3513 
3514   /* loop over local rows, determining number of off diagonal entries */
3515   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3516   jj   = 0;
3517   for (i=0; i<m; i++) {
3518     for (j=0; j<ourlens[i]; j++) {
3519       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3520       jj++;
3521     }
3522   }
3523 
3524   for (i=0; i<m; i++) {
3525     ourlens[i] -= offlens[i];
3526   }
3527   if (!sizesset) {
3528     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3529   }
3530 
3531   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3532 
3533   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3534 
3535   for (i=0; i<m; i++) {
3536     ourlens[i] += offlens[i];
3537   }
3538 
3539   if (!rank) {
3540     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3541 
3542     /* read in my part of the matrix numerical values  */
3543     nz   = procsnz[0];
3544     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3545 
3546     /* insert into matrix */
3547     jj      = rstart;
3548     smycols = mycols;
3549     svals   = vals;
3550     for (i=0; i<m; i++) {
3551       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3552       smycols += ourlens[i];
3553       svals   += ourlens[i];
3554       jj++;
3555     }
3556 
3557     /* read in other processors and ship out */
3558     for (i=1; i<size; i++) {
3559       nz   = procsnz[i];
3560       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3561       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3562     }
3563     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3564   } else {
3565     /* receive numeric values */
3566     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3567 
3568     /* receive message of values*/
3569     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3570 
3571     /* insert into matrix */
3572     jj      = rstart;
3573     smycols = mycols;
3574     svals   = vals;
3575     for (i=0; i<m; i++) {
3576       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3577       smycols += ourlens[i];
3578       svals   += ourlens[i];
3579       jj++;
3580     }
3581   }
3582   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3583   ierr = PetscFree(vals);CHKERRQ(ierr);
3584   ierr = PetscFree(mycols);CHKERRQ(ierr);
3585   ierr = PetscFree(rowners);CHKERRQ(ierr);
3586   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3587   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3588   PetscFunctionReturn(0);
3589 }
3590 
3591 #undef __FUNCT__
3592 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3593 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3594 {
3595   PetscErrorCode ierr;
3596   IS             iscol_local;
3597   PetscInt       csize;
3598 
3599   PetscFunctionBegin;
3600   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3601   if (call == MAT_REUSE_MATRIX) {
3602     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3603     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3604   } else {
3605     PetscInt cbs;
3606     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3607     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3608     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3609   }
3610   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3611   if (call == MAT_INITIAL_MATRIX) {
3612     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3613     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3614   }
3615   PetscFunctionReturn(0);
3616 }
3617 
3618 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3619 #undef __FUNCT__
3620 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3621 /*
3622     Not great since it makes two copies of the submatrix, first an SeqAIJ
3623   in local and then by concatenating the local matrices the end result.
3624   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3625 
3626   Note: This requires a sequential iscol with all indices.
3627 */
3628 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3629 {
3630   PetscErrorCode ierr;
3631   PetscMPIInt    rank,size;
3632   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3633   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3634   PetscBool      allcolumns, colflag;
3635   Mat            M,Mreuse;
3636   MatScalar      *vwork,*aa;
3637   MPI_Comm       comm;
3638   Mat_SeqAIJ     *aij;
3639 
3640   PetscFunctionBegin;
3641   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3642   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3643   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3644 
3645   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3646   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3647   if (colflag && ncol == mat->cmap->N) {
3648     allcolumns = PETSC_TRUE;
3649   } else {
3650     allcolumns = PETSC_FALSE;
3651   }
3652   if (call ==  MAT_REUSE_MATRIX) {
3653     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3654     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3655     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3656   } else {
3657     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3658   }
3659 
3660   /*
3661       m - number of local rows
3662       n - number of columns (same on all processors)
3663       rstart - first row in new global matrix generated
3664   */
3665   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3666   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3667   if (call == MAT_INITIAL_MATRIX) {
3668     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3669     ii  = aij->i;
3670     jj  = aij->j;
3671 
3672     /*
3673         Determine the number of non-zeros in the diagonal and off-diagonal
3674         portions of the matrix in order to do correct preallocation
3675     */
3676 
3677     /* first get start and end of "diagonal" columns */
3678     if (csize == PETSC_DECIDE) {
3679       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3680       if (mglobal == n) { /* square matrix */
3681         nlocal = m;
3682       } else {
3683         nlocal = n/size + ((n % size) > rank);
3684       }
3685     } else {
3686       nlocal = csize;
3687     }
3688     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3689     rstart = rend - nlocal;
3690     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3691 
3692     /* next, compute all the lengths */
3693     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3694     olens = dlens + m;
3695     for (i=0; i<m; i++) {
3696       jend = ii[i+1] - ii[i];
3697       olen = 0;
3698       dlen = 0;
3699       for (j=0; j<jend; j++) {
3700         if (*jj < rstart || *jj >= rend) olen++;
3701         else dlen++;
3702         jj++;
3703       }
3704       olens[i] = olen;
3705       dlens[i] = dlen;
3706     }
3707     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3708     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3709     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3710     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3711     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3712     ierr = PetscFree(dlens);CHKERRQ(ierr);
3713   } else {
3714     PetscInt ml,nl;
3715 
3716     M    = *newmat;
3717     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3718     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3719     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3720     /*
3721          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3722        rather than the slower MatSetValues().
3723     */
3724     M->was_assembled = PETSC_TRUE;
3725     M->assembled     = PETSC_FALSE;
3726   }
3727   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3728   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3729   ii   = aij->i;
3730   jj   = aij->j;
3731   aa   = aij->a;
3732   for (i=0; i<m; i++) {
3733     row   = rstart + i;
3734     nz    = ii[i+1] - ii[i];
3735     cwork = jj;     jj += nz;
3736     vwork = aa;     aa += nz;
3737     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3738   }
3739 
3740   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3741   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3742   *newmat = M;
3743 
3744   /* save submatrix used in processor for next request */
3745   if (call ==  MAT_INITIAL_MATRIX) {
3746     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3747     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3748   }
3749   PetscFunctionReturn(0);
3750 }
3751 
3752 #undef __FUNCT__
3753 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3754 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3755 {
3756   PetscInt       m,cstart, cend,j,nnz,i,d;
3757   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3758   const PetscInt *JJ;
3759   PetscScalar    *values;
3760   PetscErrorCode ierr;
3761 
3762   PetscFunctionBegin;
3763   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3764 
3765   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3766   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3767   m      = B->rmap->n;
3768   cstart = B->cmap->rstart;
3769   cend   = B->cmap->rend;
3770   rstart = B->rmap->rstart;
3771 
3772   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3773 
3774 #if defined(PETSC_USE_DEBUGGING)
3775   for (i=0; i<m; i++) {
3776     nnz = Ii[i+1]- Ii[i];
3777     JJ  = J + Ii[i];
3778     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3779     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3780     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3781   }
3782 #endif
3783 
3784   for (i=0; i<m; i++) {
3785     nnz     = Ii[i+1]- Ii[i];
3786     JJ      = J + Ii[i];
3787     nnz_max = PetscMax(nnz_max,nnz);
3788     d       = 0;
3789     for (j=0; j<nnz; j++) {
3790       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3791     }
3792     d_nnz[i] = d;
3793     o_nnz[i] = nnz - d;
3794   }
3795   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3796   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3797 
3798   if (v) values = (PetscScalar*)v;
3799   else {
3800     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3801     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3802   }
3803 
3804   for (i=0; i<m; i++) {
3805     ii   = i + rstart;
3806     nnz  = Ii[i+1]- Ii[i];
3807     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3808   }
3809   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3810   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3811 
3812   if (!v) {
3813     ierr = PetscFree(values);CHKERRQ(ierr);
3814   }
3815   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3816   PetscFunctionReturn(0);
3817 }
3818 
3819 #undef __FUNCT__
3820 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3821 /*@
3822    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3823    (the default parallel PETSc format).
3824 
3825    Collective on MPI_Comm
3826 
3827    Input Parameters:
3828 +  B - the matrix
3829 .  i - the indices into j for the start of each local row (starts with zero)
3830 .  j - the column indices for each local row (starts with zero)
3831 -  v - optional values in the matrix
3832 
3833    Level: developer
3834 
3835    Notes:
3836        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3837      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3838      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3839 
3840        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3841 
3842        The format which is used for the sparse matrix input, is equivalent to a
3843     row-major ordering.. i.e for the following matrix, the input data expected is
3844     as shown:
3845 
3846         1 0 0
3847         2 0 3     P0
3848        -------
3849         4 5 6     P1
3850 
3851      Process0 [P0]: rows_owned=[0,1]
3852         i =  {0,1,3}  [size = nrow+1  = 2+1]
3853         j =  {0,0,2}  [size = nz = 6]
3854         v =  {1,2,3}  [size = nz = 6]
3855 
3856      Process1 [P1]: rows_owned=[2]
3857         i =  {0,3}    [size = nrow+1  = 1+1]
3858         j =  {0,1,2}  [size = nz = 6]
3859         v =  {4,5,6}  [size = nz = 6]
3860 
3861 .keywords: matrix, aij, compressed row, sparse, parallel
3862 
3863 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3864           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3865 @*/
3866 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3867 {
3868   PetscErrorCode ierr;
3869 
3870   PetscFunctionBegin;
3871   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3872   PetscFunctionReturn(0);
3873 }
3874 
3875 #undef __FUNCT__
3876 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3877 /*@C
3878    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3879    (the default parallel PETSc format).  For good matrix assembly performance
3880    the user should preallocate the matrix storage by setting the parameters
3881    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3882    performance can be increased by more than a factor of 50.
3883 
3884    Collective on MPI_Comm
3885 
3886    Input Parameters:
3887 +  A - the matrix
3888 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3889            (same value is used for all local rows)
3890 .  d_nnz - array containing the number of nonzeros in the various rows of the
3891            DIAGONAL portion of the local submatrix (possibly different for each row)
3892            or NULL, if d_nz is used to specify the nonzero structure.
3893            The size of this array is equal to the number of local rows, i.e 'm'.
3894            For matrices that will be factored, you must leave room for (and set)
3895            the diagonal entry even if it is zero.
3896 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3897            submatrix (same value is used for all local rows).
3898 -  o_nnz - array containing the number of nonzeros in the various rows of the
3899            OFF-DIAGONAL portion of the local submatrix (possibly different for
3900            each row) or NULL, if o_nz is used to specify the nonzero
3901            structure. The size of this array is equal to the number
3902            of local rows, i.e 'm'.
3903 
3904    If the *_nnz parameter is given then the *_nz parameter is ignored
3905 
3906    The AIJ format (also called the Yale sparse matrix format or
3907    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3908    storage.  The stored row and column indices begin with zero.
3909    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3910 
3911    The parallel matrix is partitioned such that the first m0 rows belong to
3912    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3913    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3914 
3915    The DIAGONAL portion of the local submatrix of a processor can be defined
3916    as the submatrix which is obtained by extraction the part corresponding to
3917    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3918    first row that belongs to the processor, r2 is the last row belonging to
3919    the this processor, and c1-c2 is range of indices of the local part of a
3920    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3921    common case of a square matrix, the row and column ranges are the same and
3922    the DIAGONAL part is also square. The remaining portion of the local
3923    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3924 
3925    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3926 
3927    You can call MatGetInfo() to get information on how effective the preallocation was;
3928    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3929    You can also run with the option -info and look for messages with the string
3930    malloc in them to see if additional memory allocation was needed.
3931 
3932    Example usage:
3933 
3934    Consider the following 8x8 matrix with 34 non-zero values, that is
3935    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3936    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3937    as follows:
3938 
3939 .vb
3940             1  2  0  |  0  3  0  |  0  4
3941     Proc0   0  5  6  |  7  0  0  |  8  0
3942             9  0 10  | 11  0  0  | 12  0
3943     -------------------------------------
3944            13  0 14  | 15 16 17  |  0  0
3945     Proc1   0 18  0  | 19 20 21  |  0  0
3946             0  0  0  | 22 23  0  | 24  0
3947     -------------------------------------
3948     Proc2  25 26 27  |  0  0 28  | 29  0
3949            30  0  0  | 31 32 33  |  0 34
3950 .ve
3951 
3952    This can be represented as a collection of submatrices as:
3953 
3954 .vb
3955       A B C
3956       D E F
3957       G H I
3958 .ve
3959 
3960    Where the submatrices A,B,C are owned by proc0, D,E,F are
3961    owned by proc1, G,H,I are owned by proc2.
3962 
3963    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3964    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3965    The 'M','N' parameters are 8,8, and have the same values on all procs.
3966 
3967    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3968    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3969    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3970    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3971    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3972    matrix, ans [DF] as another SeqAIJ matrix.
3973 
3974    When d_nz, o_nz parameters are specified, d_nz storage elements are
3975    allocated for every row of the local diagonal submatrix, and o_nz
3976    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3977    One way to choose d_nz and o_nz is to use the max nonzerors per local
3978    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3979    In this case, the values of d_nz,o_nz are:
3980 .vb
3981      proc0 : dnz = 2, o_nz = 2
3982      proc1 : dnz = 3, o_nz = 2
3983      proc2 : dnz = 1, o_nz = 4
3984 .ve
3985    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3986    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3987    for proc3. i.e we are using 12+15+10=37 storage locations to store
3988    34 values.
3989 
3990    When d_nnz, o_nnz parameters are specified, the storage is specified
3991    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3992    In the above case the values for d_nnz,o_nnz are:
3993 .vb
3994      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3995      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3996      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3997 .ve
3998    Here the space allocated is sum of all the above values i.e 34, and
3999    hence pre-allocation is perfect.
4000 
4001    Level: intermediate
4002 
4003 .keywords: matrix, aij, compressed row, sparse, parallel
4004 
4005 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4006           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4007 @*/
4008 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4009 {
4010   PetscErrorCode ierr;
4011 
4012   PetscFunctionBegin;
4013   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4014   PetscValidType(B,1);
4015   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4016   PetscFunctionReturn(0);
4017 }
4018 
4019 #undef __FUNCT__
4020 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4021 /*@
4022      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4023          CSR format the local rows.
4024 
4025    Collective on MPI_Comm
4026 
4027    Input Parameters:
4028 +  comm - MPI communicator
4029 .  m - number of local rows (Cannot be PETSC_DECIDE)
4030 .  n - This value should be the same as the local size used in creating the
4031        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4032        calculated if N is given) For square matrices n is almost always m.
4033 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4034 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4035 .   i - row indices
4036 .   j - column indices
4037 -   a - matrix values
4038 
4039    Output Parameter:
4040 .   mat - the matrix
4041 
4042    Level: intermediate
4043 
4044    Notes:
4045        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4046      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4047      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4048 
4049        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4050 
4051        The format which is used for the sparse matrix input, is equivalent to a
4052     row-major ordering.. i.e for the following matrix, the input data expected is
4053     as shown:
4054 
4055         1 0 0
4056         2 0 3     P0
4057        -------
4058         4 5 6     P1
4059 
4060      Process0 [P0]: rows_owned=[0,1]
4061         i =  {0,1,3}  [size = nrow+1  = 2+1]
4062         j =  {0,0,2}  [size = nz = 6]
4063         v =  {1,2,3}  [size = nz = 6]
4064 
4065      Process1 [P1]: rows_owned=[2]
4066         i =  {0,3}    [size = nrow+1  = 1+1]
4067         j =  {0,1,2}  [size = nz = 6]
4068         v =  {4,5,6}  [size = nz = 6]
4069 
4070 .keywords: matrix, aij, compressed row, sparse, parallel
4071 
4072 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4073           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4074 @*/
4075 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4076 {
4077   PetscErrorCode ierr;
4078 
4079   PetscFunctionBegin;
4080   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4081   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4082   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4083   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4084   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4085   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4086   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4087   PetscFunctionReturn(0);
4088 }
4089 
4090 #undef __FUNCT__
4091 #define __FUNCT__ "MatCreateAIJ"
4092 /*@C
4093    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4094    (the default parallel PETSc format).  For good matrix assembly performance
4095    the user should preallocate the matrix storage by setting the parameters
4096    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4097    performance can be increased by more than a factor of 50.
4098 
4099    Collective on MPI_Comm
4100 
4101    Input Parameters:
4102 +  comm - MPI communicator
4103 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4104            This value should be the same as the local size used in creating the
4105            y vector for the matrix-vector product y = Ax.
4106 .  n - This value should be the same as the local size used in creating the
4107        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4108        calculated if N is given) For square matrices n is almost always m.
4109 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4110 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4111 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4112            (same value is used for all local rows)
4113 .  d_nnz - array containing the number of nonzeros in the various rows of the
4114            DIAGONAL portion of the local submatrix (possibly different for each row)
4115            or NULL, if d_nz is used to specify the nonzero structure.
4116            The size of this array is equal to the number of local rows, i.e 'm'.
4117 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4118            submatrix (same value is used for all local rows).
4119 -  o_nnz - array containing the number of nonzeros in the various rows of the
4120            OFF-DIAGONAL portion of the local submatrix (possibly different for
4121            each row) or NULL, if o_nz is used to specify the nonzero
4122            structure. The size of this array is equal to the number
4123            of local rows, i.e 'm'.
4124 
4125    Output Parameter:
4126 .  A - the matrix
4127 
4128    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4129    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4130    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4131 
4132    Notes:
4133    If the *_nnz parameter is given then the *_nz parameter is ignored
4134 
4135    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4136    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4137    storage requirements for this matrix.
4138 
4139    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4140    processor than it must be used on all processors that share the object for
4141    that argument.
4142 
4143    The user MUST specify either the local or global matrix dimensions
4144    (possibly both).
4145 
4146    The parallel matrix is partitioned across processors such that the
4147    first m0 rows belong to process 0, the next m1 rows belong to
4148    process 1, the next m2 rows belong to process 2 etc.. where
4149    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4150    values corresponding to [m x N] submatrix.
4151 
4152    The columns are logically partitioned with the n0 columns belonging
4153    to 0th partition, the next n1 columns belonging to the next
4154    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4155 
4156    The DIAGONAL portion of the local submatrix on any given processor
4157    is the submatrix corresponding to the rows and columns m,n
4158    corresponding to the given processor. i.e diagonal matrix on
4159    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4160    etc. The remaining portion of the local submatrix [m x (N-n)]
4161    constitute the OFF-DIAGONAL portion. The example below better
4162    illustrates this concept.
4163 
4164    For a square global matrix we define each processor's diagonal portion
4165    to be its local rows and the corresponding columns (a square submatrix);
4166    each processor's off-diagonal portion encompasses the remainder of the
4167    local matrix (a rectangular submatrix).
4168 
4169    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4170 
4171    When calling this routine with a single process communicator, a matrix of
4172    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4173    type of communicator, use the construction mechanism:
4174      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4175 
4176    By default, this format uses inodes (identical nodes) when possible.
4177    We search for consecutive rows with the same nonzero structure, thereby
4178    reusing matrix information to achieve increased efficiency.
4179 
4180    Options Database Keys:
4181 +  -mat_no_inode  - Do not use inodes
4182 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4183 -  -mat_aij_oneindex - Internally use indexing starting at 1
4184         rather than 0.  Note that when calling MatSetValues(),
4185         the user still MUST index entries starting at 0!
4186 
4187 
4188    Example usage:
4189 
4190    Consider the following 8x8 matrix with 34 non-zero values, that is
4191    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4192    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4193    as follows:
4194 
4195 .vb
4196             1  2  0  |  0  3  0  |  0  4
4197     Proc0   0  5  6  |  7  0  0  |  8  0
4198             9  0 10  | 11  0  0  | 12  0
4199     -------------------------------------
4200            13  0 14  | 15 16 17  |  0  0
4201     Proc1   0 18  0  | 19 20 21  |  0  0
4202             0  0  0  | 22 23  0  | 24  0
4203     -------------------------------------
4204     Proc2  25 26 27  |  0  0 28  | 29  0
4205            30  0  0  | 31 32 33  |  0 34
4206 .ve
4207 
4208    This can be represented as a collection of submatrices as:
4209 
4210 .vb
4211       A B C
4212       D E F
4213       G H I
4214 .ve
4215 
4216    Where the submatrices A,B,C are owned by proc0, D,E,F are
4217    owned by proc1, G,H,I are owned by proc2.
4218 
4219    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4220    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4221    The 'M','N' parameters are 8,8, and have the same values on all procs.
4222 
4223    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4224    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4225    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4226    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4227    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4228    matrix, ans [DF] as another SeqAIJ matrix.
4229 
4230    When d_nz, o_nz parameters are specified, d_nz storage elements are
4231    allocated for every row of the local diagonal submatrix, and o_nz
4232    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4233    One way to choose d_nz and o_nz is to use the max nonzerors per local
4234    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4235    In this case, the values of d_nz,o_nz are:
4236 .vb
4237      proc0 : dnz = 2, o_nz = 2
4238      proc1 : dnz = 3, o_nz = 2
4239      proc2 : dnz = 1, o_nz = 4
4240 .ve
4241    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4242    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4243    for proc3. i.e we are using 12+15+10=37 storage locations to store
4244    34 values.
4245 
4246    When d_nnz, o_nnz parameters are specified, the storage is specified
4247    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4248    In the above case the values for d_nnz,o_nnz are:
4249 .vb
4250      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4251      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4252      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4253 .ve
4254    Here the space allocated is sum of all the above values i.e 34, and
4255    hence pre-allocation is perfect.
4256 
4257    Level: intermediate
4258 
4259 .keywords: matrix, aij, compressed row, sparse, parallel
4260 
4261 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4262           MPIAIJ, MatCreateMPIAIJWithArrays()
4263 @*/
4264 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4265 {
4266   PetscErrorCode ierr;
4267   PetscMPIInt    size;
4268 
4269   PetscFunctionBegin;
4270   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4271   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4273   if (size > 1) {
4274     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4275     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4276   } else {
4277     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4278     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4279   }
4280   PetscFunctionReturn(0);
4281 }
4282 
4283 #undef __FUNCT__
4284 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4285 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4286 {
4287   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4288 
4289   PetscFunctionBegin;
4290   *Ad     = a->A;
4291   *Ao     = a->B;
4292   *colmap = a->garray;
4293   PetscFunctionReturn(0);
4294 }
4295 
4296 #undef __FUNCT__
4297 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4298 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4299 {
4300   PetscErrorCode ierr;
4301   PetscInt       i;
4302   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4303 
4304   PetscFunctionBegin;
4305   if (coloring->ctype == IS_COLORING_GLOBAL) {
4306     ISColoringValue *allcolors,*colors;
4307     ISColoring      ocoloring;
4308 
4309     /* set coloring for diagonal portion */
4310     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4311 
4312     /* set coloring for off-diagonal portion */
4313     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4314     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4315     for (i=0; i<a->B->cmap->n; i++) {
4316       colors[i] = allcolors[a->garray[i]];
4317     }
4318     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4319     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4320     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4321     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4322   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4323     ISColoringValue *colors;
4324     PetscInt        *larray;
4325     ISColoring      ocoloring;
4326 
4327     /* set coloring for diagonal portion */
4328     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4329     for (i=0; i<a->A->cmap->n; i++) {
4330       larray[i] = i + A->cmap->rstart;
4331     }
4332     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4333     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4334     for (i=0; i<a->A->cmap->n; i++) {
4335       colors[i] = coloring->colors[larray[i]];
4336     }
4337     ierr = PetscFree(larray);CHKERRQ(ierr);
4338     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4339     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4340     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4341 
4342     /* set coloring for off-diagonal portion */
4343     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4344     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4345     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4346     for (i=0; i<a->B->cmap->n; i++) {
4347       colors[i] = coloring->colors[larray[i]];
4348     }
4349     ierr = PetscFree(larray);CHKERRQ(ierr);
4350     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4351     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4352     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4353   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4354   PetscFunctionReturn(0);
4355 }
4356 
4357 #undef __FUNCT__
4358 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4359 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4360 {
4361   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4362   PetscErrorCode ierr;
4363 
4364   PetscFunctionBegin;
4365   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4366   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4367   PetscFunctionReturn(0);
4368 }
4369 
4370 #undef __FUNCT__
4371 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4372 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4373 {
4374   PetscErrorCode ierr;
4375   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4376   PetscInt       *indx;
4377 
4378   PetscFunctionBegin;
4379   /* This routine will ONLY return MPIAIJ type matrix */
4380   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4381   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4382   if (n == PETSC_DECIDE) {
4383     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4384   }
4385   /* Check sum(n) = N */
4386   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4387   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4388 
4389   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4390   rstart -= m;
4391 
4392   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4393   for (i=0; i<m; i++) {
4394     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4395     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4396     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4397   }
4398 
4399   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4400   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4401   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4402   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4403   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4404   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4405   PetscFunctionReturn(0);
4406 }
4407 
4408 #undef __FUNCT__
4409 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4410 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4411 {
4412   PetscErrorCode ierr;
4413   PetscInt       m,N,i,rstart,nnz,Ii;
4414   PetscInt       *indx;
4415   PetscScalar    *values;
4416 
4417   PetscFunctionBegin;
4418   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4419   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4420   for (i=0; i<m; i++) {
4421     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4422     Ii   = i + rstart;
4423     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4424     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4425   }
4426   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4427   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 #undef __FUNCT__
4432 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4433 /*@
4434       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4435                  matrices from each processor
4436 
4437     Collective on MPI_Comm
4438 
4439    Input Parameters:
4440 +    comm - the communicators the parallel matrix will live on
4441 .    inmat - the input sequential matrices
4442 .    n - number of local columns (or PETSC_DECIDE)
4443 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4444 
4445    Output Parameter:
4446 .    outmat - the parallel matrix generated
4447 
4448     Level: advanced
4449 
4450    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4451 
4452 @*/
4453 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4454 {
4455   PetscErrorCode ierr;
4456 
4457   PetscFunctionBegin;
4458   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4459   if (scall == MAT_INITIAL_MATRIX) {
4460     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4461   }
4462   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4463   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4464   PetscFunctionReturn(0);
4465 }
4466 
4467 #undef __FUNCT__
4468 #define __FUNCT__ "MatFileSplit"
4469 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4470 {
4471   PetscErrorCode    ierr;
4472   PetscMPIInt       rank;
4473   PetscInt          m,N,i,rstart,nnz;
4474   size_t            len;
4475   const PetscInt    *indx;
4476   PetscViewer       out;
4477   char              *name;
4478   Mat               B;
4479   const PetscScalar *values;
4480 
4481   PetscFunctionBegin;
4482   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4483   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4484   /* Should this be the type of the diagonal block of A? */
4485   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4486   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4487   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4488   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4489   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4490   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4491   for (i=0; i<m; i++) {
4492     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4493     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4494     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4495   }
4496   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4497   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4498 
4499   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4500   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4501   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4502   sprintf(name,"%s.%d",outfile,rank);
4503   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4504   ierr = PetscFree(name);CHKERRQ(ierr);
4505   ierr = MatView(B,out);CHKERRQ(ierr);
4506   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4507   ierr = MatDestroy(&B);CHKERRQ(ierr);
4508   PetscFunctionReturn(0);
4509 }
4510 
4511 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4512 #undef __FUNCT__
4513 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4514 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4515 {
4516   PetscErrorCode      ierr;
4517   Mat_Merge_SeqsToMPI *merge;
4518   PetscContainer      container;
4519 
4520   PetscFunctionBegin;
4521   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4522   if (container) {
4523     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4524     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4525     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4526     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4527     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4528     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4529     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4530     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4531     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4532     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4533     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4534     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4535     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4536     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4537     ierr = PetscFree(merge);CHKERRQ(ierr);
4538     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4539   }
4540   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4541   PetscFunctionReturn(0);
4542 }
4543 
4544 #include <../src/mat/utils/freespace.h>
4545 #include <petscbt.h>
4546 
4547 #undef __FUNCT__
4548 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4549 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4550 {
4551   PetscErrorCode      ierr;
4552   MPI_Comm            comm;
4553   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4554   PetscMPIInt         size,rank,taga,*len_s;
4555   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4556   PetscInt            proc,m;
4557   PetscInt            **buf_ri,**buf_rj;
4558   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4559   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4560   MPI_Request         *s_waits,*r_waits;
4561   MPI_Status          *status;
4562   MatScalar           *aa=a->a;
4563   MatScalar           **abuf_r,*ba_i;
4564   Mat_Merge_SeqsToMPI *merge;
4565   PetscContainer      container;
4566 
4567   PetscFunctionBegin;
4568   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4569   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4570 
4571   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4572   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4573 
4574   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4575   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4576 
4577   bi     = merge->bi;
4578   bj     = merge->bj;
4579   buf_ri = merge->buf_ri;
4580   buf_rj = merge->buf_rj;
4581 
4582   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4583   owners = merge->rowmap->range;
4584   len_s  = merge->len_s;
4585 
4586   /* send and recv matrix values */
4587   /*-----------------------------*/
4588   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4589   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4590 
4591   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4592   for (proc=0,k=0; proc<size; proc++) {
4593     if (!len_s[proc]) continue;
4594     i    = owners[proc];
4595     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4596     k++;
4597   }
4598 
4599   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4600   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4601   ierr = PetscFree(status);CHKERRQ(ierr);
4602 
4603   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4604   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4605 
4606   /* insert mat values of mpimat */
4607   /*----------------------------*/
4608   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4609   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4610 
4611   for (k=0; k<merge->nrecv; k++) {
4612     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4613     nrows       = *(buf_ri_k[k]);
4614     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4615     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4616   }
4617 
4618   /* set values of ba */
4619   m = merge->rowmap->n;
4620   for (i=0; i<m; i++) {
4621     arow = owners[rank] + i;
4622     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4623     bnzi = bi[i+1] - bi[i];
4624     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4625 
4626     /* add local non-zero vals of this proc's seqmat into ba */
4627     anzi   = ai[arow+1] - ai[arow];
4628     aj     = a->j + ai[arow];
4629     aa     = a->a + ai[arow];
4630     nextaj = 0;
4631     for (j=0; nextaj<anzi; j++) {
4632       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4633         ba_i[j] += aa[nextaj++];
4634       }
4635     }
4636 
4637     /* add received vals into ba */
4638     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4639       /* i-th row */
4640       if (i == *nextrow[k]) {
4641         anzi   = *(nextai[k]+1) - *nextai[k];
4642         aj     = buf_rj[k] + *(nextai[k]);
4643         aa     = abuf_r[k] + *(nextai[k]);
4644         nextaj = 0;
4645         for (j=0; nextaj<anzi; j++) {
4646           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4647             ba_i[j] += aa[nextaj++];
4648           }
4649         }
4650         nextrow[k]++; nextai[k]++;
4651       }
4652     }
4653     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4654   }
4655   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4656   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4657 
4658   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4659   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4660   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4661   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4662   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4663   PetscFunctionReturn(0);
4664 }
4665 
4666 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4667 
4668 #undef __FUNCT__
4669 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4670 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4671 {
4672   PetscErrorCode      ierr;
4673   Mat                 B_mpi;
4674   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4675   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4676   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4677   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4678   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4679   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4680   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4681   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4682   MPI_Status          *status;
4683   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4684   PetscBT             lnkbt;
4685   Mat_Merge_SeqsToMPI *merge;
4686   PetscContainer      container;
4687 
4688   PetscFunctionBegin;
4689   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4690 
4691   /* make sure it is a PETSc comm */
4692   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4693   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4694   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4695 
4696   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4697   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4698 
4699   /* determine row ownership */
4700   /*---------------------------------------------------------*/
4701   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4702   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4703   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4704   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4705   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4706   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4707   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4708 
4709   m      = merge->rowmap->n;
4710   owners = merge->rowmap->range;
4711 
4712   /* determine the number of messages to send, their lengths */
4713   /*---------------------------------------------------------*/
4714   len_s = merge->len_s;
4715 
4716   len          = 0; /* length of buf_si[] */
4717   merge->nsend = 0;
4718   for (proc=0; proc<size; proc++) {
4719     len_si[proc] = 0;
4720     if (proc == rank) {
4721       len_s[proc] = 0;
4722     } else {
4723       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4724       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4725     }
4726     if (len_s[proc]) {
4727       merge->nsend++;
4728       nrows = 0;
4729       for (i=owners[proc]; i<owners[proc+1]; i++) {
4730         if (ai[i+1] > ai[i]) nrows++;
4731       }
4732       len_si[proc] = 2*(nrows+1);
4733       len         += len_si[proc];
4734     }
4735   }
4736 
4737   /* determine the number and length of messages to receive for ij-structure */
4738   /*-------------------------------------------------------------------------*/
4739   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4740   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4741 
4742   /* post the Irecv of j-structure */
4743   /*-------------------------------*/
4744   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4745   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4746 
4747   /* post the Isend of j-structure */
4748   /*--------------------------------*/
4749   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4750 
4751   for (proc=0, k=0; proc<size; proc++) {
4752     if (!len_s[proc]) continue;
4753     i    = owners[proc];
4754     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4755     k++;
4756   }
4757 
4758   /* receives and sends of j-structure are complete */
4759   /*------------------------------------------------*/
4760   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4761   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4762 
4763   /* send and recv i-structure */
4764   /*---------------------------*/
4765   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4766   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4767 
4768   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4769   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4770   for (proc=0,k=0; proc<size; proc++) {
4771     if (!len_s[proc]) continue;
4772     /* form outgoing message for i-structure:
4773          buf_si[0]:                 nrows to be sent
4774                [1:nrows]:           row index (global)
4775                [nrows+1:2*nrows+1]: i-structure index
4776     */
4777     /*-------------------------------------------*/
4778     nrows       = len_si[proc]/2 - 1;
4779     buf_si_i    = buf_si + nrows+1;
4780     buf_si[0]   = nrows;
4781     buf_si_i[0] = 0;
4782     nrows       = 0;
4783     for (i=owners[proc]; i<owners[proc+1]; i++) {
4784       anzi = ai[i+1] - ai[i];
4785       if (anzi) {
4786         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4787         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4788         nrows++;
4789       }
4790     }
4791     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4792     k++;
4793     buf_si += len_si[proc];
4794   }
4795 
4796   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4797   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4798 
4799   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4800   for (i=0; i<merge->nrecv; i++) {
4801     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4802   }
4803 
4804   ierr = PetscFree(len_si);CHKERRQ(ierr);
4805   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4806   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4807   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4808   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4809   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4810   ierr = PetscFree(status);CHKERRQ(ierr);
4811 
4812   /* compute a local seq matrix in each processor */
4813   /*----------------------------------------------*/
4814   /* allocate bi array and free space for accumulating nonzero column info */
4815   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4816   bi[0] = 0;
4817 
4818   /* create and initialize a linked list */
4819   nlnk = N+1;
4820   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4821 
4822   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4823   len  = ai[owners[rank+1]] - ai[owners[rank]];
4824   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4825 
4826   current_space = free_space;
4827 
4828   /* determine symbolic info for each local row */
4829   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4830 
4831   for (k=0; k<merge->nrecv; k++) {
4832     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4833     nrows       = *buf_ri_k[k];
4834     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4835     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4836   }
4837 
4838   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4839   len  = 0;
4840   for (i=0; i<m; i++) {
4841     bnzi = 0;
4842     /* add local non-zero cols of this proc's seqmat into lnk */
4843     arow  = owners[rank] + i;
4844     anzi  = ai[arow+1] - ai[arow];
4845     aj    = a->j + ai[arow];
4846     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4847     bnzi += nlnk;
4848     /* add received col data into lnk */
4849     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4850       if (i == *nextrow[k]) { /* i-th row */
4851         anzi  = *(nextai[k]+1) - *nextai[k];
4852         aj    = buf_rj[k] + *nextai[k];
4853         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4854         bnzi += nlnk;
4855         nextrow[k]++; nextai[k]++;
4856       }
4857     }
4858     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4859 
4860     /* if free space is not available, make more free space */
4861     if (current_space->local_remaining<bnzi) {
4862       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4863       nspacedouble++;
4864     }
4865     /* copy data into free space, then initialize lnk */
4866     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4867     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4868 
4869     current_space->array           += bnzi;
4870     current_space->local_used      += bnzi;
4871     current_space->local_remaining -= bnzi;
4872 
4873     bi[i+1] = bi[i] + bnzi;
4874   }
4875 
4876   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4877 
4878   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4879   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4880   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4881 
4882   /* create symbolic parallel matrix B_mpi */
4883   /*---------------------------------------*/
4884   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4885   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4886   if (n==PETSC_DECIDE) {
4887     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4888   } else {
4889     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4890   }
4891   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4892   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4893   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4894   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4895   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4896 
4897   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4898   B_mpi->assembled    = PETSC_FALSE;
4899   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4900   merge->bi           = bi;
4901   merge->bj           = bj;
4902   merge->buf_ri       = buf_ri;
4903   merge->buf_rj       = buf_rj;
4904   merge->coi          = NULL;
4905   merge->coj          = NULL;
4906   merge->owners_co    = NULL;
4907 
4908   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4909 
4910   /* attach the supporting struct to B_mpi for reuse */
4911   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4912   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4913   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4914   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4915   *mpimat = B_mpi;
4916 
4917   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4918   PetscFunctionReturn(0);
4919 }
4920 
4921 #undef __FUNCT__
4922 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4923 /*@C
4924       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4925                  matrices from each processor
4926 
4927     Collective on MPI_Comm
4928 
4929    Input Parameters:
4930 +    comm - the communicators the parallel matrix will live on
4931 .    seqmat - the input sequential matrices
4932 .    m - number of local rows (or PETSC_DECIDE)
4933 .    n - number of local columns (or PETSC_DECIDE)
4934 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4935 
4936    Output Parameter:
4937 .    mpimat - the parallel matrix generated
4938 
4939     Level: advanced
4940 
4941    Notes:
4942      The dimensions of the sequential matrix in each processor MUST be the same.
4943      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4944      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4945 @*/
4946 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4947 {
4948   PetscErrorCode ierr;
4949   PetscMPIInt    size;
4950 
4951   PetscFunctionBegin;
4952   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4953   if (size == 1) {
4954     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4955     if (scall == MAT_INITIAL_MATRIX) {
4956       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4957     } else {
4958       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4959     }
4960     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4961     PetscFunctionReturn(0);
4962   }
4963   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4964   if (scall == MAT_INITIAL_MATRIX) {
4965     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4966   }
4967   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4968   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4969   PetscFunctionReturn(0);
4970 }
4971 
4972 #undef __FUNCT__
4973 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4974 /*@
4975      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4976           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4977           with MatGetSize()
4978 
4979     Not Collective
4980 
4981    Input Parameters:
4982 +    A - the matrix
4983 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4984 
4985    Output Parameter:
4986 .    A_loc - the local sequential matrix generated
4987 
4988     Level: developer
4989 
4990 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4991 
4992 @*/
4993 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4994 {
4995   PetscErrorCode ierr;
4996   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4997   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4998   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4999   MatScalar      *aa=a->a,*ba=b->a,*cam;
5000   PetscScalar    *ca;
5001   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5002   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5003   PetscBool      match;
5004 
5005   PetscFunctionBegin;
5006   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5007   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5008   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5009   if (scall == MAT_INITIAL_MATRIX) {
5010     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5011     ci[0] = 0;
5012     for (i=0; i<am; i++) {
5013       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5014     }
5015     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5016     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5017     k    = 0;
5018     for (i=0; i<am; i++) {
5019       ncols_o = bi[i+1] - bi[i];
5020       ncols_d = ai[i+1] - ai[i];
5021       /* off-diagonal portion of A */
5022       for (jo=0; jo<ncols_o; jo++) {
5023         col = cmap[*bj];
5024         if (col >= cstart) break;
5025         cj[k]   = col; bj++;
5026         ca[k++] = *ba++;
5027       }
5028       /* diagonal portion of A */
5029       for (j=0; j<ncols_d; j++) {
5030         cj[k]   = cstart + *aj++;
5031         ca[k++] = *aa++;
5032       }
5033       /* off-diagonal portion of A */
5034       for (j=jo; j<ncols_o; j++) {
5035         cj[k]   = cmap[*bj++];
5036         ca[k++] = *ba++;
5037       }
5038     }
5039     /* put together the new matrix */
5040     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5041     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5042     /* Since these are PETSc arrays, change flags to free them as necessary. */
5043     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5044     mat->free_a  = PETSC_TRUE;
5045     mat->free_ij = PETSC_TRUE;
5046     mat->nonew   = 0;
5047   } else if (scall == MAT_REUSE_MATRIX) {
5048     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5049     ci = mat->i; cj = mat->j; cam = mat->a;
5050     for (i=0; i<am; i++) {
5051       /* off-diagonal portion of A */
5052       ncols_o = bi[i+1] - bi[i];
5053       for (jo=0; jo<ncols_o; jo++) {
5054         col = cmap[*bj];
5055         if (col >= cstart) break;
5056         *cam++ = *ba++; bj++;
5057       }
5058       /* diagonal portion of A */
5059       ncols_d = ai[i+1] - ai[i];
5060       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5061       /* off-diagonal portion of A */
5062       for (j=jo; j<ncols_o; j++) {
5063         *cam++ = *ba++; bj++;
5064       }
5065     }
5066   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5067   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5068   PetscFunctionReturn(0);
5069 }
5070 
5071 #undef __FUNCT__
5072 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5073 /*@C
5074      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5075 
5076     Not Collective
5077 
5078    Input Parameters:
5079 +    A - the matrix
5080 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5081 -    row, col - index sets of rows and columns to extract (or NULL)
5082 
5083    Output Parameter:
5084 .    A_loc - the local sequential matrix generated
5085 
5086     Level: developer
5087 
5088 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5089 
5090 @*/
5091 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5092 {
5093   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5094   PetscErrorCode ierr;
5095   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5096   IS             isrowa,iscola;
5097   Mat            *aloc;
5098   PetscBool      match;
5099 
5100   PetscFunctionBegin;
5101   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5102   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5103   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5104   if (!row) {
5105     start = A->rmap->rstart; end = A->rmap->rend;
5106     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5107   } else {
5108     isrowa = *row;
5109   }
5110   if (!col) {
5111     start = A->cmap->rstart;
5112     cmap  = a->garray;
5113     nzA   = a->A->cmap->n;
5114     nzB   = a->B->cmap->n;
5115     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5116     ncols = 0;
5117     for (i=0; i<nzB; i++) {
5118       if (cmap[i] < start) idx[ncols++] = cmap[i];
5119       else break;
5120     }
5121     imark = i;
5122     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5123     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5124     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5125   } else {
5126     iscola = *col;
5127   }
5128   if (scall != MAT_INITIAL_MATRIX) {
5129     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5130     aloc[0] = *A_loc;
5131   }
5132   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5133   *A_loc = aloc[0];
5134   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5135   if (!row) {
5136     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5137   }
5138   if (!col) {
5139     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5140   }
5141   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5142   PetscFunctionReturn(0);
5143 }
5144 
5145 #undef __FUNCT__
5146 #define __FUNCT__ "MatGetBrowsOfAcols"
5147 /*@C
5148     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5149 
5150     Collective on Mat
5151 
5152    Input Parameters:
5153 +    A,B - the matrices in mpiaij format
5154 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5155 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5156 
5157    Output Parameter:
5158 +    rowb, colb - index sets of rows and columns of B to extract
5159 -    B_seq - the sequential matrix generated
5160 
5161     Level: developer
5162 
5163 @*/
5164 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5165 {
5166   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5167   PetscErrorCode ierr;
5168   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5169   IS             isrowb,iscolb;
5170   Mat            *bseq=NULL;
5171 
5172   PetscFunctionBegin;
5173   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5174     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5175   }
5176   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5177 
5178   if (scall == MAT_INITIAL_MATRIX) {
5179     start = A->cmap->rstart;
5180     cmap  = a->garray;
5181     nzA   = a->A->cmap->n;
5182     nzB   = a->B->cmap->n;
5183     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5184     ncols = 0;
5185     for (i=0; i<nzB; i++) {  /* row < local row index */
5186       if (cmap[i] < start) idx[ncols++] = cmap[i];
5187       else break;
5188     }
5189     imark = i;
5190     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5191     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5192     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5193     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5194   } else {
5195     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5196     isrowb  = *rowb; iscolb = *colb;
5197     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5198     bseq[0] = *B_seq;
5199   }
5200   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5201   *B_seq = bseq[0];
5202   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5203   if (!rowb) {
5204     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5205   } else {
5206     *rowb = isrowb;
5207   }
5208   if (!colb) {
5209     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5210   } else {
5211     *colb = iscolb;
5212   }
5213   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5214   PetscFunctionReturn(0);
5215 }
5216 
5217 #undef __FUNCT__
5218 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5219 /*
5220     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5221     of the OFF-DIAGONAL portion of local A
5222 
5223     Collective on Mat
5224 
5225    Input Parameters:
5226 +    A,B - the matrices in mpiaij format
5227 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5228 
5229    Output Parameter:
5230 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5231 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5232 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5233 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5234 
5235     Level: developer
5236 
5237 */
5238 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5239 {
5240   VecScatter_MPI_General *gen_to,*gen_from;
5241   PetscErrorCode         ierr;
5242   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5243   Mat_SeqAIJ             *b_oth;
5244   VecScatter             ctx =a->Mvctx;
5245   MPI_Comm               comm;
5246   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5247   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5248   PetscScalar            *rvalues,*svalues;
5249   MatScalar              *b_otha,*bufa,*bufA;
5250   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5251   MPI_Request            *rwaits = NULL,*swaits = NULL;
5252   MPI_Status             *sstatus,rstatus;
5253   PetscMPIInt            jj;
5254   PetscInt               *cols,sbs,rbs;
5255   PetscScalar            *vals;
5256 
5257   PetscFunctionBegin;
5258   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5259   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5260     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5261   }
5262   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5263   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5264 
5265   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5266   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5267   rvalues  = gen_from->values; /* holds the length of receiving row */
5268   svalues  = gen_to->values;   /* holds the length of sending row */
5269   nrecvs   = gen_from->n;
5270   nsends   = gen_to->n;
5271 
5272   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5273   srow    = gen_to->indices;    /* local row index to be sent */
5274   sstarts = gen_to->starts;
5275   sprocs  = gen_to->procs;
5276   sstatus = gen_to->sstatus;
5277   sbs     = gen_to->bs;
5278   rstarts = gen_from->starts;
5279   rprocs  = gen_from->procs;
5280   rbs     = gen_from->bs;
5281 
5282   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5283   if (scall == MAT_INITIAL_MATRIX) {
5284     /* i-array */
5285     /*---------*/
5286     /*  post receives */
5287     for (i=0; i<nrecvs; i++) {
5288       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5289       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5290       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5291     }
5292 
5293     /* pack the outgoing message */
5294     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5295 
5296     sstartsj[0] = 0;
5297     rstartsj[0] = 0;
5298     len         = 0; /* total length of j or a array to be sent */
5299     k           = 0;
5300     for (i=0; i<nsends; i++) {
5301       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5302       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5303       for (j=0; j<nrows; j++) {
5304         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5305         for (l=0; l<sbs; l++) {
5306           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5307 
5308           rowlen[j*sbs+l] = ncols;
5309 
5310           len += ncols;
5311           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5312         }
5313         k++;
5314       }
5315       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5316 
5317       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5318     }
5319     /* recvs and sends of i-array are completed */
5320     i = nrecvs;
5321     while (i--) {
5322       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5323     }
5324     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5325 
5326     /* allocate buffers for sending j and a arrays */
5327     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5328     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5329 
5330     /* create i-array of B_oth */
5331     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5332 
5333     b_othi[0] = 0;
5334     len       = 0; /* total length of j or a array to be received */
5335     k         = 0;
5336     for (i=0; i<nrecvs; i++) {
5337       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5338       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5339       for (j=0; j<nrows; j++) {
5340         b_othi[k+1] = b_othi[k] + rowlen[j];
5341         len        += rowlen[j]; k++;
5342       }
5343       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5344     }
5345 
5346     /* allocate space for j and a arrrays of B_oth */
5347     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5348     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5349 
5350     /* j-array */
5351     /*---------*/
5352     /*  post receives of j-array */
5353     for (i=0; i<nrecvs; i++) {
5354       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5355       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5356     }
5357 
5358     /* pack the outgoing message j-array */
5359     k = 0;
5360     for (i=0; i<nsends; i++) {
5361       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5362       bufJ  = bufj+sstartsj[i];
5363       for (j=0; j<nrows; j++) {
5364         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5365         for (ll=0; ll<sbs; ll++) {
5366           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5367           for (l=0; l<ncols; l++) {
5368             *bufJ++ = cols[l];
5369           }
5370           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5371         }
5372       }
5373       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5374     }
5375 
5376     /* recvs and sends of j-array are completed */
5377     i = nrecvs;
5378     while (i--) {
5379       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5380     }
5381     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5382   } else if (scall == MAT_REUSE_MATRIX) {
5383     sstartsj = *startsj_s;
5384     rstartsj = *startsj_r;
5385     bufa     = *bufa_ptr;
5386     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5387     b_otha   = b_oth->a;
5388   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5389 
5390   /* a-array */
5391   /*---------*/
5392   /*  post receives of a-array */
5393   for (i=0; i<nrecvs; i++) {
5394     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5395     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5396   }
5397 
5398   /* pack the outgoing message a-array */
5399   k = 0;
5400   for (i=0; i<nsends; i++) {
5401     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5402     bufA  = bufa+sstartsj[i];
5403     for (j=0; j<nrows; j++) {
5404       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5405       for (ll=0; ll<sbs; ll++) {
5406         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5407         for (l=0; l<ncols; l++) {
5408           *bufA++ = vals[l];
5409         }
5410         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5411       }
5412     }
5413     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5414   }
5415   /* recvs and sends of a-array are completed */
5416   i = nrecvs;
5417   while (i--) {
5418     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5419   }
5420   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5421   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5422 
5423   if (scall == MAT_INITIAL_MATRIX) {
5424     /* put together the new matrix */
5425     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5426 
5427     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5428     /* Since these are PETSc arrays, change flags to free them as necessary. */
5429     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5430     b_oth->free_a  = PETSC_TRUE;
5431     b_oth->free_ij = PETSC_TRUE;
5432     b_oth->nonew   = 0;
5433 
5434     ierr = PetscFree(bufj);CHKERRQ(ierr);
5435     if (!startsj_s || !bufa_ptr) {
5436       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5437       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5438     } else {
5439       *startsj_s = sstartsj;
5440       *startsj_r = rstartsj;
5441       *bufa_ptr  = bufa;
5442     }
5443   }
5444   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5445   PetscFunctionReturn(0);
5446 }
5447 
5448 #undef __FUNCT__
5449 #define __FUNCT__ "MatGetCommunicationStructs"
5450 /*@C
5451   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5452 
5453   Not Collective
5454 
5455   Input Parameters:
5456 . A - The matrix in mpiaij format
5457 
5458   Output Parameter:
5459 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5460 . colmap - A map from global column index to local index into lvec
5461 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5462 
5463   Level: developer
5464 
5465 @*/
5466 #if defined(PETSC_USE_CTABLE)
5467 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5468 #else
5469 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5470 #endif
5471 {
5472   Mat_MPIAIJ *a;
5473 
5474   PetscFunctionBegin;
5475   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5476   PetscValidPointer(lvec, 2);
5477   PetscValidPointer(colmap, 3);
5478   PetscValidPointer(multScatter, 4);
5479   a = (Mat_MPIAIJ*) A->data;
5480   if (lvec) *lvec = a->lvec;
5481   if (colmap) *colmap = a->colmap;
5482   if (multScatter) *multScatter = a->Mvctx;
5483   PetscFunctionReturn(0);
5484 }
5485 
5486 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5487 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5488 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5489 
5490 #undef __FUNCT__
5491 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5492 /*
5493     Computes (B'*A')' since computing B*A directly is untenable
5494 
5495                n                       p                          p
5496         (              )       (              )         (                  )
5497       m (      A       )  *  n (       B      )   =   m (         C        )
5498         (              )       (              )         (                  )
5499 
5500 */
5501 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5502 {
5503   PetscErrorCode ierr;
5504   Mat            At,Bt,Ct;
5505 
5506   PetscFunctionBegin;
5507   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5508   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5509   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5510   ierr = MatDestroy(&At);CHKERRQ(ierr);
5511   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5512   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5513   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5514   PetscFunctionReturn(0);
5515 }
5516 
5517 #undef __FUNCT__
5518 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5519 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5520 {
5521   PetscErrorCode ierr;
5522   PetscInt       m=A->rmap->n,n=B->cmap->n;
5523   Mat            Cmat;
5524 
5525   PetscFunctionBegin;
5526   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5527   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5528   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5529   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5530   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5531   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5532   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5533   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5534 
5535   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5536 
5537   *C = Cmat;
5538   PetscFunctionReturn(0);
5539 }
5540 
5541 /* ----------------------------------------------------------------*/
5542 #undef __FUNCT__
5543 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5544 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5545 {
5546   PetscErrorCode ierr;
5547 
5548   PetscFunctionBegin;
5549   if (scall == MAT_INITIAL_MATRIX) {
5550     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5551     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5552     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5553   }
5554   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5555   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5556   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5557   PetscFunctionReturn(0);
5558 }
5559 
5560 #if defined(PETSC_HAVE_MUMPS)
5561 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5562 #endif
5563 #if defined(PETSC_HAVE_PASTIX)
5564 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5565 #endif
5566 #if defined(PETSC_HAVE_SUPERLU_DIST)
5567 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5568 #endif
5569 #if defined(PETSC_HAVE_CLIQUE)
5570 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5571 #endif
5572 
5573 /*MC
5574    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5575 
5576    Options Database Keys:
5577 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5578 
5579   Level: beginner
5580 
5581 .seealso: MatCreateAIJ()
5582 M*/
5583 
5584 #undef __FUNCT__
5585 #define __FUNCT__ "MatCreate_MPIAIJ"
5586 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5587 {
5588   Mat_MPIAIJ     *b;
5589   PetscErrorCode ierr;
5590   PetscMPIInt    size;
5591 
5592   PetscFunctionBegin;
5593   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5594 
5595   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5596   B->data       = (void*)b;
5597   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5598   B->assembled  = PETSC_FALSE;
5599   B->insertmode = NOT_SET_VALUES;
5600   b->size       = size;
5601 
5602   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5603 
5604   /* build cache for off array entries formed */
5605   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5606 
5607   b->donotstash  = PETSC_FALSE;
5608   b->colmap      = 0;
5609   b->garray      = 0;
5610   b->roworiented = PETSC_TRUE;
5611 
5612   /* stuff used for matrix vector multiply */
5613   b->lvec  = NULL;
5614   b->Mvctx = NULL;
5615 
5616   /* stuff for MatGetRow() */
5617   b->rowindices   = 0;
5618   b->rowvalues    = 0;
5619   b->getrowactive = PETSC_FALSE;
5620 
5621   /* flexible pointer used in CUSP/CUSPARSE classes */
5622   b->spptr = NULL;
5623 
5624 #if defined(PETSC_HAVE_MUMPS)
5625   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5626 #endif
5627 #if defined(PETSC_HAVE_PASTIX)
5628   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5629 #endif
5630 #if defined(PETSC_HAVE_SUPERLU_DIST)
5631   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5632 #endif
5633 #if defined(PETSC_HAVE_CLIQUE)
5634   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5635 #endif
5636   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5637   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5638   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5639   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5640   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5644   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5649   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5650   PetscFunctionReturn(0);
5651 }
5652 
5653 #undef __FUNCT__
5654 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5655 /*@
5656      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5657          and "off-diagonal" part of the matrix in CSR format.
5658 
5659    Collective on MPI_Comm
5660 
5661    Input Parameters:
5662 +  comm - MPI communicator
5663 .  m - number of local rows (Cannot be PETSC_DECIDE)
5664 .  n - This value should be the same as the local size used in creating the
5665        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5666        calculated if N is given) For square matrices n is almost always m.
5667 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5668 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5669 .   i - row indices for "diagonal" portion of matrix
5670 .   j - column indices
5671 .   a - matrix values
5672 .   oi - row indices for "off-diagonal" portion of matrix
5673 .   oj - column indices
5674 -   oa - matrix values
5675 
5676    Output Parameter:
5677 .   mat - the matrix
5678 
5679    Level: advanced
5680 
5681    Notes:
5682        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5683        must free the arrays once the matrix has been destroyed and not before.
5684 
5685        The i and j indices are 0 based
5686 
5687        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5688 
5689        This sets local rows and cannot be used to set off-processor values.
5690 
5691        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5692        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5693        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5694        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5695        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5696        communication if it is known that only local entries will be set.
5697 
5698 .keywords: matrix, aij, compressed row, sparse, parallel
5699 
5700 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5701           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5702 @*/
5703 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5704 {
5705   PetscErrorCode ierr;
5706   Mat_MPIAIJ     *maij;
5707 
5708   PetscFunctionBegin;
5709   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5710   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5711   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5712   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5713   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5714   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5715   maij = (Mat_MPIAIJ*) (*mat)->data;
5716 
5717   (*mat)->preallocated = PETSC_TRUE;
5718 
5719   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5720   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5721 
5722   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5723   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5724 
5725   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5726   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5727   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5728   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5729 
5730   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5731   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5732   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5733   PetscFunctionReturn(0);
5734 }
5735 
5736 /*
5737     Special version for direct calls from Fortran
5738 */
5739 #include <petsc-private/fortranimpl.h>
5740 
5741 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5742 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5743 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5744 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5745 #endif
5746 
5747 /* Change these macros so can be used in void function */
5748 #undef CHKERRQ
5749 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5750 #undef SETERRQ2
5751 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5752 #undef SETERRQ3
5753 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5754 #undef SETERRQ
5755 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5756 
5757 #undef __FUNCT__
5758 #define __FUNCT__ "matsetvaluesmpiaij_"
5759 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5760 {
5761   Mat            mat  = *mmat;
5762   PetscInt       m    = *mm, n = *mn;
5763   InsertMode     addv = *maddv;
5764   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5765   PetscScalar    value;
5766   PetscErrorCode ierr;
5767 
5768   MatCheckPreallocated(mat,1);
5769   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5770 
5771 #if defined(PETSC_USE_DEBUG)
5772   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5773 #endif
5774   {
5775     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5776     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5777     PetscBool roworiented = aij->roworiented;
5778 
5779     /* Some Variables required in the macro */
5780     Mat        A                 = aij->A;
5781     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5782     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5783     MatScalar  *aa               = a->a;
5784     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5785     Mat        B                 = aij->B;
5786     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5787     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5788     MatScalar  *ba               = b->a;
5789 
5790     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5791     PetscInt  nonew = a->nonew;
5792     MatScalar *ap1,*ap2;
5793 
5794     PetscFunctionBegin;
5795     for (i=0; i<m; i++) {
5796       if (im[i] < 0) continue;
5797 #if defined(PETSC_USE_DEBUG)
5798       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5799 #endif
5800       if (im[i] >= rstart && im[i] < rend) {
5801         row      = im[i] - rstart;
5802         lastcol1 = -1;
5803         rp1      = aj + ai[row];
5804         ap1      = aa + ai[row];
5805         rmax1    = aimax[row];
5806         nrow1    = ailen[row];
5807         low1     = 0;
5808         high1    = nrow1;
5809         lastcol2 = -1;
5810         rp2      = bj + bi[row];
5811         ap2      = ba + bi[row];
5812         rmax2    = bimax[row];
5813         nrow2    = bilen[row];
5814         low2     = 0;
5815         high2    = nrow2;
5816 
5817         for (j=0; j<n; j++) {
5818           if (roworiented) value = v[i*n+j];
5819           else value = v[i+j*m];
5820           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5821           if (in[j] >= cstart && in[j] < cend) {
5822             col = in[j] - cstart;
5823             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5824           } else if (in[j] < 0) continue;
5825 #if defined(PETSC_USE_DEBUG)
5826           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5827 #endif
5828           else {
5829             if (mat->was_assembled) {
5830               if (!aij->colmap) {
5831                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5832               }
5833 #if defined(PETSC_USE_CTABLE)
5834               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5835               col--;
5836 #else
5837               col = aij->colmap[in[j]] - 1;
5838 #endif
5839               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5840                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5841                 col  =  in[j];
5842                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5843                 B     = aij->B;
5844                 b     = (Mat_SeqAIJ*)B->data;
5845                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5846                 rp2   = bj + bi[row];
5847                 ap2   = ba + bi[row];
5848                 rmax2 = bimax[row];
5849                 nrow2 = bilen[row];
5850                 low2  = 0;
5851                 high2 = nrow2;
5852                 bm    = aij->B->rmap->n;
5853                 ba    = b->a;
5854               }
5855             } else col = in[j];
5856             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5857           }
5858         }
5859       } else if (!aij->donotstash) {
5860         if (roworiented) {
5861           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5862         } else {
5863           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5864         }
5865       }
5866     }
5867   }
5868   PetscFunctionReturnVoid();
5869 }
5870 
5871