xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5c8f6a953e7ed1c81f507d64aebddb11080b60e9)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233         for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236           jj++;
237         }
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267         for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270           jj++;
271         }
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++) {
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1)  low1 = 0;     \
382     else                 high1 = nrow1; \
383     lastcol1 = col;\
384     while (high1-low1 > 5) { \
385       t = (low1+high1)/2; \
386       if (rp1[t] > col) high1 = t; \
387       else              low1  = t; \
388     } \
389       for (_i=low1; _i<high1; _i++) { \
390         if (rp1[_i] > col) break; \
391         if (rp1[_i] == col) { \
392           if (addv == ADD_VALUES) ap1[_i] += value;   \
393           else                    ap1[_i] = value; \
394           goto a_noinsert; \
395         } \
396       }  \
397       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
398       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
399       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
400       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
401       N = nrow1++ - 1; a->nz++; high1++; \
402       /* shift up all the later entries in this row */ \
403       for (ii=N; ii>=_i; ii--) { \
404         rp1[ii+1] = rp1[ii]; \
405         ap1[ii+1] = ap1[ii]; \
406       } \
407       rp1[_i] = col;  \
408       ap1[_i] = value;  \
409       a_noinsert: ; \
410       ailen[row] = nrow1; \
411 }
412 
413 
414 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
415 { \
416     if (col <= lastcol2) low2 = 0;                        \
417     else high2 = nrow2;                                   \
418     lastcol2 = col;                                       \
419     while (high2-low2 > 5) {                              \
420       t = (low2+high2)/2;                                 \
421       if (rp2[t] > col) high2 = t;                        \
422       else             low2  = t;                         \
423     }                                                     \
424     for (_i=low2; _i<high2; _i++) {                       \
425       if (rp2[_i] > col) break;                           \
426       if (rp2[_i] == col) {                               \
427         if (addv == ADD_VALUES) ap2[_i] += value;         \
428         else                    ap2[_i] = value;          \
429         goto b_noinsert;                                  \
430       }                                                   \
431     }                                                     \
432     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
433     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
434     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
435     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
436     N = nrow2++ - 1; b->nz++; high2++;                    \
437     /* shift up all the later entries in this row */      \
438     for (ii=N; ii>=_i; ii--) {                            \
439       rp2[ii+1] = rp2[ii];                                \
440       ap2[ii+1] = ap2[ii];                                \
441     }                                                     \
442     rp2[_i] = col;                                        \
443     ap2[_i] = value;                                      \
444     b_noinsert: ;                                         \
445     bilen[row] = nrow2;                                   \
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
450 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
451 {
452   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
453   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
454   PetscErrorCode ierr;
455   PetscInt       l,*garray = mat->garray,diag;
456 
457   PetscFunctionBegin;
458   /* code only works for square matrices A */
459 
460   /* find size of row to the left of the diagonal part */
461   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
462   row  = row - diag;
463   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
464     if (garray[b->j[b->i[row]+l]] > diag) break;
465   }
466   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
467 
468   /* diagonal part */
469   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* right of diagonal part */
472   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatSetValues_MPIAIJ"
478 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
479 {
480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
481   PetscScalar    value;
482   PetscErrorCode ierr;
483   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
484   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
485   PetscBool      roworiented = aij->roworiented;
486 
487   /* Some Variables required in the macro */
488   Mat            A = aij->A;
489   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
490   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
491   MatScalar      *aa = a->a;
492   PetscBool      ignorezeroentries = a->ignorezeroentries;
493   Mat            B = aij->B;
494   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
495   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
496   MatScalar      *ba = b->a;
497 
498   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
499   PetscInt       nonew;
500   MatScalar      *ap1,*ap2;
501 
502   PetscFunctionBegin;
503   if (v) PetscValidScalarPointer(v,6);
504   for (i=0; i<m; i++) {
505     if (im[i] < 0) continue;
506 #if defined(PETSC_USE_DEBUG)
507     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
508 #endif
509     if (im[i] >= rstart && im[i] < rend) {
510       row      = im[i] - rstart;
511       lastcol1 = -1;
512       rp1      = aj + ai[row];
513       ap1      = aa + ai[row];
514       rmax1    = aimax[row];
515       nrow1    = ailen[row];
516       low1     = 0;
517       high1    = nrow1;
518       lastcol2 = -1;
519       rp2      = bj + bi[row];
520       ap2      = ba + bi[row];
521       rmax2    = bimax[row];
522       nrow2    = bilen[row];
523       low2     = 0;
524       high2    = nrow2;
525 
526       for (j=0; j<n; j++) {
527         if (v) {
528           if (roworiented) value = v[i*n+j];
529           else             value = v[i+j*m];
530         } else value = 0.0;
531         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
532         if (in[j] >= cstart && in[j] < cend) {
533           col = in[j] - cstart;
534           nonew = a->nonew;
535           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
536         } else if (in[j] < 0) continue;
537 #if defined(PETSC_USE_DEBUG)
538         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
539 #endif
540         else {
541           if (mat->was_assembled) {
542             if (!aij->colmap) {
543               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
544             }
545 #if defined (PETSC_USE_CTABLE)
546             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
547             col--;
548 #else
549             col = aij->colmap[in[j]] - 1;
550 #endif
551             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
552               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
553               col =  in[j];
554               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
555               B = aij->B;
556               b = (Mat_SeqAIJ*)B->data;
557               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
558               rp2      = bj + bi[row];
559               ap2      = ba + bi[row];
560               rmax2    = bimax[row];
561               nrow2    = bilen[row];
562               low2     = 0;
563               high2    = nrow2;
564               bm       = aij->B->rmap->n;
565               ba = b->a;
566             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
567           } else col = in[j];
568           nonew = b->nonew;
569           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
570         }
571       }
572     } else {
573       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
574       if (!aij->donotstash) {
575         mat->assembled = PETSC_FALSE;
576         if (roworiented) {
577           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
578         } else {
579           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
580         }
581       }
582     }
583   }
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNCT__
588 #define __FUNCT__ "MatGetValues_MPIAIJ"
589 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
590 {
591   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
592   PetscErrorCode ierr;
593   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
594   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
595 
596   PetscFunctionBegin;
597   for (i=0; i<m; i++) {
598     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
599     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
600     if (idxm[i] >= rstart && idxm[i] < rend) {
601       row = idxm[i] - rstart;
602       for (j=0; j<n; j++) {
603         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
604         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
605         if (idxn[j] >= cstart && idxn[j] < cend) {
606           col = idxn[j] - cstart;
607           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
608         } else {
609           if (!aij->colmap) {
610             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
611           }
612 #if defined (PETSC_USE_CTABLE)
613           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
614           col --;
615 #else
616           col = aij->colmap[idxn[j]] - 1;
617 #endif
618           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
619           else {
620             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
621           }
622         }
623       }
624     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
633 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
636   PetscErrorCode ierr;
637   PetscInt       nstash,reallocs;
638   InsertMode     addv;
639 
640   PetscFunctionBegin;
641   if (aij->donotstash || mat->nooffprocentries) {
642     PetscFunctionReturn(0);
643   }
644 
645   /* make sure all processors are either in INSERTMODE or ADDMODE */
646   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
647   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
648   mat->insertmode = addv; /* in case this processor had no cache */
649 
650   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
651   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
652   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 #undef __FUNCT__
657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
659 {
660   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
661   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
662   PetscErrorCode ierr;
663   PetscMPIInt    n;
664   PetscInt       i,j,rstart,ncols,flg;
665   PetscInt       *row,*col;
666   PetscBool      other_disassembled;
667   PetscScalar    *val;
668   InsertMode     addv = mat->insertmode;
669 
670   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
671 
672   PetscFunctionBegin;
673   if (!aij->donotstash && !mat->nooffprocentries) {
674     while (1) {
675       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
676       if (!flg) break;
677 
678       for (i=0; i<n;) {
679         /* Now identify the consecutive vals belonging to the same row */
680         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
681         if (j < n) ncols = j-i;
682         else       ncols = n-i;
683         /* Now assemble all these values with a single function call */
684         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
685         i = j;
686       }
687     }
688     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
689   }
690   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
691   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
692 
693   /* determine if any processor has disassembled, if so we must
694      also disassemble ourselfs, in order that we may reassemble. */
695   /*
696      if nonzero structure of submatrix B cannot change then we know that
697      no processor disassembled thus we can skip this stuff
698   */
699   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
700     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
701     if (mat->was_assembled && !other_disassembled) {
702       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
703     }
704   }
705   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
706     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
707   }
708   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
709   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
710   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
711   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
712 
713   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
714   aij->rowvalues = 0;
715 
716   /* used by MatAXPY() */
717   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
718   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
719 
720   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
721   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
727 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
728 {
729   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
730   PetscErrorCode ierr;
731 
732   PetscFunctionBegin;
733   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
734   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatZeroRows_MPIAIJ"
740 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
741 {
742   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
743   PetscErrorCode    ierr;
744   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
745   PetscInt          i,*owners = A->rmap->range;
746   PetscInt          *nprocs,j,idx,nsends,row;
747   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
748   PetscInt          *rvalues,count,base,slen,*source;
749   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
750   MPI_Comm          comm = ((PetscObject)A)->comm;
751   MPI_Request       *send_waits,*recv_waits;
752   MPI_Status        recv_status,*send_status;
753   const PetscScalar *xx;
754   PetscScalar       *bb;
755 #if defined(PETSC_DEBUG)
756   PetscBool      found = PETSC_FALSE;
757 #endif
758 
759   PetscFunctionBegin;
760   /*  first count number of contributors to each processor */
761   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
762   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
763   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
764   j = 0;
765   for (i=0; i<N; i++) {
766     if (lastidx > (idx = rows[i])) j = 0;
767     lastidx = idx;
768     for (; j<size; j++) {
769       if (idx >= owners[j] && idx < owners[j+1]) {
770         nprocs[2*j]++;
771         nprocs[2*j+1] = 1;
772         owner[i] = j;
773 #if defined(PETSC_DEBUG)
774         found = PETSC_TRUE;
775 #endif
776         break;
777       }
778     }
779 #if defined(PETSC_DEBUG)
780     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
781     found = PETSC_FALSE;
782 #endif
783   }
784   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
785 
786   if (A->nooffproczerorows) {
787     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
788     nrecvs = nsends;
789     nmax   = N;
790   } else {
791     /* inform other processors of number of messages and max length*/
792     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
793   }
794 
795   /* post receives:   */
796   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
797   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
798   for (i=0; i<nrecvs; i++) {
799     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
800   }
801 
802   /* do sends:
803       1) starts[i] gives the starting index in svalues for stuff going to
804          the ith processor
805   */
806   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
807   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
808   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
809   starts[0] = 0;
810   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
811   for (i=0; i<N; i++) {
812     svalues[starts[owner[i]]++] = rows[i];
813   }
814 
815   starts[0] = 0;
816   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
817   count = 0;
818   for (i=0; i<size; i++) {
819     if (nprocs[2*i+1]) {
820       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
821     }
822   }
823   ierr = PetscFree(starts);CHKERRQ(ierr);
824 
825   base = owners[rank];
826 
827   /*  wait on receives */
828   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
829   count  = nrecvs; slen = 0;
830   while (count) {
831     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
832     /* unpack receives into our local space */
833     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
834     source[imdex]  = recv_status.MPI_SOURCE;
835     lens[imdex]    = n;
836     slen          += n;
837     count--;
838   }
839   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
840 
841   /* move the data into the send scatter */
842   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
843   count = 0;
844   for (i=0; i<nrecvs; i++) {
845     values = rvalues + i*nmax;
846     for (j=0; j<lens[i]; j++) {
847       lrows[count++] = values[j] - base;
848     }
849   }
850   ierr = PetscFree(rvalues);CHKERRQ(ierr);
851   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
852   ierr = PetscFree(owner);CHKERRQ(ierr);
853   ierr = PetscFree(nprocs);CHKERRQ(ierr);
854 
855   /* fix right hand side if needed */
856   if (x && b) {
857     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
858     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
859     for (i=0; i<slen; i++) {
860       bb[lrows[i]] = diag*xx[lrows[i]];
861     }
862     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
864   }
865   /*
866         Zero the required rows. If the "diagonal block" of the matrix
867      is square and the user wishes to set the diagonal we use separate
868      code so that MatSetValues() is not called for each diagonal allocating
869      new memory, thus calling lots of mallocs and slowing things down.
870 
871   */
872   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
873   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
875     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
876   } else if (diag != 0.0) {
877     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
878     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
879       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
880 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
881     }
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm = ((PetscObject)A)->comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool         found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   /*  first count number of contributors to each processor */
931   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
932   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
933   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
934   j = 0;
935   for (i=0; i<N; i++) {
936     if (lastidx > (idx = rows[i])) j = 0;
937     lastidx = idx;
938     for (; j<size; j++) {
939       if (idx >= owners[j] && idx < owners[j+1]) {
940         nprocs[2*j]++;
941         nprocs[2*j+1] = 1;
942         owner[i] = j;
943 #if defined(PETSC_DEBUG)
944         found = PETSC_TRUE;
945 #endif
946         break;
947       }
948     }
949 #if defined(PETSC_DEBUG)
950     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
951     found = PETSC_FALSE;
952 #endif
953   }
954   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
955 
956   /* inform other processors of number of messages and max length*/
957   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
958 
959   /* post receives:   */
960   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
961   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
962   for (i=0; i<nrecvs; i++) {
963     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
964   }
965 
966   /* do sends:
967       1) starts[i] gives the starting index in svalues for stuff going to
968          the ith processor
969   */
970   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
971   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
972   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
973   starts[0] = 0;
974   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
975   for (i=0; i<N; i++) {
976     svalues[starts[owner[i]]++] = rows[i];
977   }
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count  = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998     source[imdex]  = recv_status.MPI_SOURCE;
999     lens[imdex]    = n;
1000     slen          += n;
1001     count--;
1002   }
1003   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1004 
1005   /* move the data into the send scatter */
1006   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1007   count = 0;
1008   for (i=0; i<nrecvs; i++) {
1009     values = rvalues + i*nmax;
1010     for (j=0; j<lens[i]; j++) {
1011       lrows[count++] = values[j] - base;
1012     }
1013   }
1014   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1015   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1016   ierr = PetscFree(owner);CHKERRQ(ierr);
1017   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1018   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1019 
1020   /* zero diagonal part of matrix */
1021   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1022 
1023   /* handle off diagonal part of matrix */
1024   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1025   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1026   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1027   for (i=0; i<slen; i++) {
1028     bb[lrows[i]] = 1;
1029   }
1030   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1031   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1034   if (x) {
1035     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1038     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1039   }
1040   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1041 
1042   /* remove zeroed rows of off diagonal matrix */
1043   ii = aij->i;
1044   for (i=0; i<slen; i++) {
1045     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1046   }
1047 
1048   /* loop over all elements of off process part of matrix zeroing removed columns*/
1049   if (aij->compressedrow.use) {
1050     m    = aij->compressedrow.nrows;
1051     ii   = aij->compressedrow.i;
1052     ridx = aij->compressedrow.rindex;
1053     for (i=0; i<m; i++) {
1054       n   = ii[i+1] - ii[i];
1055       aj  = aij->j + ii[i];
1056       aa  = aij->a + ii[i];
1057 
1058       for (j=0; j<n; j++) {
1059         if (PetscAbsScalar(mask[*aj])) {
1060           if (b) bb[*ridx] -= *aa*xx[*aj];
1061           *aa        = 0.0;
1062         }
1063         aa++;
1064         aj++;
1065       }
1066       ridx++;
1067     }
1068   } else { /* do not use compressed row format */
1069     m = l->B->rmap->n;
1070     for (i=0; i<m; i++) {
1071       n   = ii[i+1] - ii[i];
1072       aj  = aij->j + ii[i];
1073       aa  = aij->a + ii[i];
1074       for (j=0; j<n; j++) {
1075         if (PetscAbsScalar(mask[*aj])) {
1076           if (b) bb[i] -= *aa*xx[*aj];
1077           *aa    = 0.0;
1078         }
1079         aa++;
1080         aj++;
1081       }
1082     }
1083   }
1084   if (x) {
1085     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1086     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1087   }
1088   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1089   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1090   ierr = PetscFree(lrows);CHKERRQ(ierr);
1091 
1092   /* wait on sends */
1093   if (nsends) {
1094     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1095     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1096     ierr = PetscFree(send_status);CHKERRQ(ierr);
1097   }
1098   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1099   ierr = PetscFree(svalues);CHKERRQ(ierr);
1100 
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatMult_MPIAIJ"
1106 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1107 {
1108   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1109   PetscErrorCode ierr;
1110   PetscInt       nt;
1111 
1112   PetscFunctionBegin;
1113   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1114   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1115   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1117   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1118   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1124 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1125 {
1126   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1136 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1137 {
1138   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1139   PetscErrorCode ierr;
1140 
1141   PetscFunctionBegin;
1142   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1143   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1144   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1145   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1151 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1152 {
1153   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1154   PetscErrorCode ierr;
1155   PetscBool      merged;
1156 
1157   PetscFunctionBegin;
1158   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1159   /* do nondiagonal part */
1160   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1161   if (!merged) {
1162     /* send it on its way */
1163     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1164     /* do local part */
1165     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1166     /* receive remote parts: note this assumes the values are not actually */
1167     /* added in yy until the next line, */
1168     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1169   } else {
1170     /* do local part */
1171     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1172     /* send it on its way */
1173     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1174     /* values actually were received in the Begin() but we need to call this nop */
1175     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1176   }
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 EXTERN_C_BEGIN
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1183 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1184 {
1185   MPI_Comm       comm;
1186   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1187   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1188   IS             Me,Notme;
1189   PetscErrorCode ierr;
1190   PetscInt       M,N,first,last,*notme,i;
1191   PetscMPIInt    size;
1192 
1193   PetscFunctionBegin;
1194   /* Easy test: symmetric diagonal block */
1195   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1196   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1197   if (!*f) PetscFunctionReturn(0);
1198   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1199   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1200   if (size == 1) PetscFunctionReturn(0);
1201 
1202   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1203   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1204   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1205   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1206   for (i=0; i<first; i++) notme[i] = i;
1207   for (i=last; i<M; i++) notme[i-last+first] = i;
1208   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1209   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1210   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1211   Aoff = Aoffs[0];
1212   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1213   Boff = Boffs[0];
1214   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1215   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1216   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1217   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1218   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1219   ierr = PetscFree(notme);CHKERRQ(ierr);
1220    PetscFunctionReturn(0);
1221 }
1222 EXTERN_C_END
1223 
1224 #undef __FUNCT__
1225 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1226 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1227 {
1228   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1229   PetscErrorCode ierr;
1230 
1231   PetscFunctionBegin;
1232   /* do nondiagonal part */
1233   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1234   /* send it on its way */
1235   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1236   /* do local part */
1237   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1238   /* receive remote parts */
1239   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 /*
1244   This only works correctly for square matrices where the subblock A->A is the
1245    diagonal block
1246 */
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1249 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1250 {
1251   PetscErrorCode ierr;
1252   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1253 
1254   PetscFunctionBegin;
1255   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1256   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1257   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatScale_MPIAIJ"
1263 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1264 {
1265   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1266   PetscErrorCode ierr;
1267 
1268   PetscFunctionBegin;
1269   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1270   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 #undef __FUNCT__
1275 #define __FUNCT__ "MatDestroy_MPIAIJ"
1276 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1277 {
1278   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282 #if defined(PETSC_USE_LOG)
1283   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1284 #endif
1285   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1286   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1287   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1288   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1289 #if defined (PETSC_USE_CTABLE)
1290   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1291 #else
1292   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1293 #endif
1294   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1295   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1296   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1297   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1298   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1299   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1300 
1301   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1315 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1316 {
1317   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1318   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1319   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1320   PetscErrorCode    ierr;
1321   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1322   int               fd;
1323   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1324   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1325   PetscScalar       *column_values;
1326   PetscInt          message_count,flowcontrolcount;
1327   FILE              *file;
1328 
1329   PetscFunctionBegin;
1330   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1331   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1332   nz   = A->nz + B->nz;
1333   if (!rank) {
1334     header[0] = MAT_FILE_CLASSID;
1335     header[1] = mat->rmap->N;
1336     header[2] = mat->cmap->N;
1337     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1338     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1339     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1340     /* get largest number of rows any processor has */
1341     rlen = mat->rmap->n;
1342     range = mat->rmap->range;
1343     for (i=1; i<size; i++) {
1344       rlen = PetscMax(rlen,range[i+1] - range[i]);
1345     }
1346   } else {
1347     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1348     rlen = mat->rmap->n;
1349   }
1350 
1351   /* load up the local row counts */
1352   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1353   for (i=0; i<mat->rmap->n; i++) {
1354     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1355   }
1356 
1357   /* store the row lengths to the file */
1358   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1359   if (!rank) {
1360     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1361     for (i=1; i<size; i++) {
1362       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1363       rlen = range[i+1] - range[i];
1364       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1365       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1366     }
1367     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1368   } else {
1369     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1370     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1371     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1372   }
1373   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1374 
1375   /* load up the local column indices */
1376   nzmax = nz; /* th processor needs space a largest processor needs */
1377   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1378   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1379   cnt  = 0;
1380   for (i=0; i<mat->rmap->n; i++) {
1381     for (j=B->i[i]; j<B->i[i+1]; j++) {
1382       if ((col = garray[B->j[j]]) > cstart) break;
1383       column_indices[cnt++] = col;
1384     }
1385     for (k=A->i[i]; k<A->i[i+1]; k++) {
1386       column_indices[cnt++] = A->j[k] + cstart;
1387     }
1388     for (; j<B->i[i+1]; j++) {
1389       column_indices[cnt++] = garray[B->j[j]];
1390     }
1391   }
1392   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1393 
1394   /* store the column indices to the file */
1395    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1396   if (!rank) {
1397     MPI_Status status;
1398     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1399     for (i=1; i<size; i++) {
1400       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1401       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1402       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1403       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1404       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1405     }
1406      ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1407   } else {
1408     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1409     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1410     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1411     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1412   }
1413   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1414 
1415   /* load up the local column values */
1416   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1417   cnt  = 0;
1418   for (i=0; i<mat->rmap->n; i++) {
1419     for (j=B->i[i]; j<B->i[i+1]; j++) {
1420       if (garray[B->j[j]] > cstart) break;
1421       column_values[cnt++] = B->a[j];
1422     }
1423     for (k=A->i[i]; k<A->i[i+1]; k++) {
1424       column_values[cnt++] = A->a[k];
1425     }
1426     for (; j<B->i[i+1]; j++) {
1427       column_values[cnt++] = B->a[j];
1428     }
1429   }
1430   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1431 
1432   /* store the column values to the file */
1433    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1434   if (!rank) {
1435     MPI_Status status;
1436     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1437     for (i=1; i<size; i++) {
1438        ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1439       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1440       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1441       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1442       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1443     }
1444     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1445   } else {
1446     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1447     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1448     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1449     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1450   }
1451   ierr = PetscFree(column_values);CHKERRQ(ierr);
1452 
1453   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1454   if (file) {
1455     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1456   }
1457 
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1463 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1464 {
1465   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1466   PetscErrorCode    ierr;
1467   PetscMPIInt       rank = aij->rank,size = aij->size;
1468   PetscBool         isdraw,iascii,isbinary;
1469   PetscViewer       sviewer;
1470   PetscViewerFormat format;
1471 
1472   PetscFunctionBegin;
1473   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1474   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1475   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1476   if (iascii) {
1477     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1478     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1479       MatInfo    info;
1480       PetscBool  inodes;
1481 
1482       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1483       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1484       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1485       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1486       if (!inodes) {
1487         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1488                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1489       } else {
1490         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1491                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1492       }
1493       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1494       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1495       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1497       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1498       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1499       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1500       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1501       PetscFunctionReturn(0);
1502     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1503       PetscInt   inodecount,inodelimit,*inodes;
1504       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1505       if (inodes) {
1506         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1507       } else {
1508         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1509       }
1510       PetscFunctionReturn(0);
1511     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1512       PetscFunctionReturn(0);
1513     }
1514   } else if (isbinary) {
1515     if (size == 1) {
1516       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1517       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1518     } else {
1519       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1520     }
1521     PetscFunctionReturn(0);
1522   } else if (isdraw) {
1523     PetscDraw  draw;
1524     PetscBool  isnull;
1525     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1526     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1527   }
1528 
1529   if (size == 1) {
1530     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1531     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1532   } else {
1533     /* assemble the entire matrix onto first processor. */
1534     Mat         A;
1535     Mat_SeqAIJ  *Aloc;
1536     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1537     MatScalar   *a;
1538 
1539     if (mat->rmap->N > 1024) {
1540       PetscBool  flg = PETSC_FALSE;
1541 
1542       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1543       if (!flg) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1544     }
1545 
1546     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1547     if (!rank) {
1548       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1549     } else {
1550       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1551     }
1552     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1553     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1554     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1555     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1556     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1557 
1558     /* copy over the A part */
1559     Aloc = (Mat_SeqAIJ*)aij->A->data;
1560     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1561     row = mat->rmap->rstart;
1562     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1563     for (i=0; i<m; i++) {
1564       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1565       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1566     }
1567     aj = Aloc->j;
1568     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1569 
1570     /* copy over the B part */
1571     Aloc = (Mat_SeqAIJ*)aij->B->data;
1572     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1573     row  = mat->rmap->rstart;
1574     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1575     ct   = cols;
1576     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1577     for (i=0; i<m; i++) {
1578       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1579       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1580     }
1581     ierr = PetscFree(ct);CHKERRQ(ierr);
1582     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1584     /*
1585        Everyone has to call to draw the matrix since the graphics waits are
1586        synchronized across all processors that share the PetscDraw object
1587     */
1588     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1589     if (!rank) {
1590       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1591       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1592       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1593       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1594     }
1595     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1596     ierr = MatDestroy(&A);CHKERRQ(ierr);
1597   }
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatView_MPIAIJ"
1603 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1604 {
1605   PetscErrorCode ierr;
1606   PetscBool      iascii,isdraw,issocket,isbinary;
1607 
1608   PetscFunctionBegin;
1609   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1610   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1611   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1612   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1613   if (iascii || isdraw || isbinary || issocket) {
1614     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1615   }
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 #undef __FUNCT__
1620 #define __FUNCT__ "MatSOR_MPIAIJ"
1621 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1622 {
1623   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1624   PetscErrorCode ierr;
1625   Vec            bb1 = 0;
1626   PetscBool      hasop;
1627 
1628   PetscFunctionBegin;
1629   if (flag == SOR_APPLY_UPPER) {
1630     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1631     PetscFunctionReturn(0);
1632   }
1633 
1634   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1635     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1636   }
1637 
1638   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643 
1644     while (its--) {
1645       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647 
1648       /* update rhs: bb1 = bb - B*x */
1649       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1650       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1651 
1652       /* local sweep */
1653       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1654     }
1655   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1656     if (flag & SOR_ZERO_INITIAL_GUESS) {
1657       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1658       its--;
1659     }
1660     while (its--) {
1661       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663 
1664       /* update rhs: bb1 = bb - B*x */
1665       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1666       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1667 
1668       /* local sweep */
1669       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1670     }
1671   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1672     if (flag & SOR_ZERO_INITIAL_GUESS) {
1673       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1674       its--;
1675     }
1676     while (its--) {
1677       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679 
1680       /* update rhs: bb1 = bb - B*x */
1681       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1682       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1683 
1684       /* local sweep */
1685       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1686     }
1687   } else if (flag & SOR_EISENSTAT) {
1688     Vec         xx1;
1689 
1690     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1691     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1692 
1693     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1694     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695     if (!mat->diag) {
1696       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1697       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1698     }
1699     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1700     if (hasop) {
1701       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1702     } else {
1703       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1704     }
1705     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1706 
1707     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1708 
1709     /* local sweep */
1710     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1711     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1712     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1713   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1714 
1715   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatPermute_MPIAIJ"
1721 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1722 {
1723   Mat            aA,aB,Aperm;
1724   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1725   PetscScalar    *aa,*ba;
1726   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1727   PetscSF        rowsf,sf;
1728   IS             parcolp = PETSC_NULL;
1729   PetscBool      done;
1730   PetscErrorCode ierr;
1731 
1732   PetscFunctionBegin;
1733   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1734   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1735   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1736   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1737 
1738   /* Invert row permutation to find out where my rows should go */
1739   ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr);
1740   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1741   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1742   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1743   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1744   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1745 
1746   /* Invert column permutation to find out where my columns should go */
1747   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1748   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1749   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1750   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1751   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1752   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1753   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1754 
1755   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1756   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1757   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1758 
1759   /* Find out where my gcols should go */
1760   ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr);
1761   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1762   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1763   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1764   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1765   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1766   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1767   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1768 
1769   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1770   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1771   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1772   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1773   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1774   for (i=0; i<m; i++) {
1775     PetscInt row = rdest[i],rowner;
1776     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1777     for (j=ai[i]; j<ai[i+1]; j++) {
1778       PetscInt cowner,col = cdest[aj[j]];
1779       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1780       if (rowner == cowner) dnnz[i]++;
1781       else onnz[i]++;
1782     }
1783     for (j=bi[i]; j<bi[i+1]; j++) {
1784       PetscInt cowner,col = gcdest[bj[j]];
1785       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1786       if (rowner == cowner) dnnz[i]++;
1787       else onnz[i]++;
1788     }
1789   }
1790   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1791   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1792   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1793   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1794   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1795   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1796   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1797 
1798   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1799   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1800   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1801   for (i=0; i<m; i++) {
1802     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1803     PetscInt rowlen;
1804     rowlen = ai[i+1] - ai[i];
1805     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1806     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1807     rowlen = bi[i+1] - bi[i];
1808     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1809     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1810   }
1811   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1812   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1813   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1814   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1815   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1816   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1817   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1818   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1819   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1820   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1821   *B = Aperm;
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1827 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1828 {
1829   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1830   Mat            A = mat->A,B = mat->B;
1831   PetscErrorCode ierr;
1832   PetscReal      isend[5],irecv[5];
1833 
1834   PetscFunctionBegin;
1835   info->block_size     = 1.0;
1836   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1837   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1838   isend[3] = info->memory;  isend[4] = info->mallocs;
1839   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1840   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1841   isend[3] += info->memory;  isend[4] += info->mallocs;
1842   if (flag == MAT_LOCAL) {
1843     info->nz_used      = isend[0];
1844     info->nz_allocated = isend[1];
1845     info->nz_unneeded  = isend[2];
1846     info->memory       = isend[3];
1847     info->mallocs      = isend[4];
1848   } else if (flag == MAT_GLOBAL_MAX) {
1849     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1850     info->nz_used      = irecv[0];
1851     info->nz_allocated = irecv[1];
1852     info->nz_unneeded  = irecv[2];
1853     info->memory       = irecv[3];
1854     info->mallocs      = irecv[4];
1855   } else if (flag == MAT_GLOBAL_SUM) {
1856     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1857     info->nz_used      = irecv[0];
1858     info->nz_allocated = irecv[1];
1859     info->nz_unneeded  = irecv[2];
1860     info->memory       = irecv[3];
1861     info->mallocs      = irecv[4];
1862   }
1863   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1864   info->fill_ratio_needed = 0;
1865   info->factor_mallocs    = 0;
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatSetOption_MPIAIJ"
1871 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1872 {
1873   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1874   PetscErrorCode ierr;
1875 
1876   PetscFunctionBegin;
1877   switch (op) {
1878   case MAT_NEW_NONZERO_LOCATIONS:
1879   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1880   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1881   case MAT_KEEP_NONZERO_PATTERN:
1882   case MAT_NEW_NONZERO_LOCATION_ERR:
1883   case MAT_USE_INODES:
1884   case MAT_IGNORE_ZERO_ENTRIES:
1885     MatCheckPreallocated(A,1);
1886     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1887     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1888     break;
1889   case MAT_ROW_ORIENTED:
1890     a->roworiented = flg;
1891     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1892     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1893     break;
1894   case MAT_NEW_DIAGONALS:
1895     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1896     break;
1897   case MAT_IGNORE_OFF_PROC_ENTRIES:
1898     a->donotstash = flg;
1899     break;
1900   case MAT_SPD:
1901     A->spd_set                         = PETSC_TRUE;
1902     A->spd                             = flg;
1903     if (flg) {
1904       A->symmetric                     = PETSC_TRUE;
1905       A->structurally_symmetric        = PETSC_TRUE;
1906       A->symmetric_set                 = PETSC_TRUE;
1907       A->structurally_symmetric_set    = PETSC_TRUE;
1908     }
1909     break;
1910   case MAT_SYMMETRIC:
1911     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1912     break;
1913   case MAT_STRUCTURALLY_SYMMETRIC:
1914     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1915     break;
1916   case MAT_HERMITIAN:
1917     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1918     break;
1919   case MAT_SYMMETRY_ETERNAL:
1920     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1921     break;
1922   default:
1923     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1924   }
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #undef __FUNCT__
1929 #define __FUNCT__ "MatGetRow_MPIAIJ"
1930 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1931 {
1932   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1933   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1934   PetscErrorCode ierr;
1935   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1936   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1937   PetscInt       *cmap,*idx_p;
1938 
1939   PetscFunctionBegin;
1940   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1941   mat->getrowactive = PETSC_TRUE;
1942 
1943   if (!mat->rowvalues && (idx || v)) {
1944     /*
1945         allocate enough space to hold information from the longest row.
1946     */
1947     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1948     PetscInt   max = 1,tmp;
1949     for (i=0; i<matin->rmap->n; i++) {
1950       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1951       if (max < tmp) { max = tmp; }
1952     }
1953     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1954   }
1955 
1956   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1957   lrow = row - rstart;
1958 
1959   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1960   if (!v)   {pvA = 0; pvB = 0;}
1961   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1962   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1963   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1964   nztot = nzA + nzB;
1965 
1966   cmap  = mat->garray;
1967   if (v  || idx) {
1968     if (nztot) {
1969       /* Sort by increasing column numbers, assuming A and B already sorted */
1970       PetscInt imark = -1;
1971       if (v) {
1972         *v = v_p = mat->rowvalues;
1973         for (i=0; i<nzB; i++) {
1974           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1975           else break;
1976         }
1977         imark = i;
1978         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1979         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1980       }
1981       if (idx) {
1982         *idx = idx_p = mat->rowindices;
1983         if (imark > -1) {
1984           for (i=0; i<imark; i++) {
1985             idx_p[i] = cmap[cworkB[i]];
1986           }
1987         } else {
1988           for (i=0; i<nzB; i++) {
1989             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1990             else break;
1991           }
1992           imark = i;
1993         }
1994         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1995         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1996       }
1997     } else {
1998       if (idx) *idx = 0;
1999       if (v)   *v   = 0;
2000     }
2001   }
2002   *nz = nztot;
2003   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
2004   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
2005   PetscFunctionReturn(0);
2006 }
2007 
2008 #undef __FUNCT__
2009 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
2010 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2011 {
2012   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2013 
2014   PetscFunctionBegin;
2015   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2016   aij->getrowactive = PETSC_FALSE;
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatNorm_MPIAIJ"
2022 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2023 {
2024   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2025   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2026   PetscErrorCode ierr;
2027   PetscInt       i,j,cstart = mat->cmap->rstart;
2028   PetscReal      sum = 0.0;
2029   MatScalar      *v;
2030 
2031   PetscFunctionBegin;
2032   if (aij->size == 1) {
2033     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2034   } else {
2035     if (type == NORM_FROBENIUS) {
2036       v = amat->a;
2037       for (i=0; i<amat->nz; i++) {
2038         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2039       }
2040       v = bmat->a;
2041       for (i=0; i<bmat->nz; i++) {
2042         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2043       }
2044       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2045       *norm = PetscSqrtReal(*norm);
2046     } else if (type == NORM_1) { /* max column norm */
2047       PetscReal *tmp,*tmp2;
2048       PetscInt  *jj,*garray = aij->garray;
2049       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2050       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2051       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2052       *norm = 0.0;
2053       v = amat->a; jj = amat->j;
2054       for (j=0; j<amat->nz; j++) {
2055         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2056       }
2057       v = bmat->a; jj = bmat->j;
2058       for (j=0; j<bmat->nz; j++) {
2059         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2060       }
2061       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2062       for (j=0; j<mat->cmap->N; j++) {
2063         if (tmp2[j] > *norm) *norm = tmp2[j];
2064       }
2065       ierr = PetscFree(tmp);CHKERRQ(ierr);
2066       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2067     } else if (type == NORM_INFINITY) { /* max row norm */
2068       PetscReal ntemp = 0.0;
2069       for (j=0; j<aij->A->rmap->n; j++) {
2070         v = amat->a + amat->i[j];
2071         sum = 0.0;
2072         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2073           sum += PetscAbsScalar(*v); v++;
2074         }
2075         v = bmat->a + bmat->i[j];
2076         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2077           sum += PetscAbsScalar(*v); v++;
2078         }
2079         if (sum > ntemp) ntemp = sum;
2080       }
2081       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2082     } else {
2083       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2084     }
2085   }
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 #undef __FUNCT__
2090 #define __FUNCT__ "MatTranspose_MPIAIJ"
2091 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2092 {
2093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2094   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2095   PetscErrorCode ierr;
2096   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2097   PetscInt       cstart=A->cmap->rstart,ncol;
2098   Mat            B;
2099   MatScalar      *array;
2100 
2101   PetscFunctionBegin;
2102   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2103 
2104   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2105   ai = Aloc->i; aj = Aloc->j;
2106   bi = Bloc->i; bj = Bloc->j;
2107   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2108     PetscInt *d_nnz,*g_nnz,*o_nnz;
2109     PetscSFNode *oloc;
2110     PETSC_UNUSED PetscSF sf;
2111 
2112     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2113     /* compute d_nnz for preallocation */
2114     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2115     for (i=0; i<ai[ma]; i++) {
2116       d_nnz[aj[i]] ++;
2117       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2118     }
2119     /* compute local off-diagonal contributions */
2120     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2121     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2122     /* map those to global */
2123     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2124     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2125     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2126     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2127     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2128     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2129     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2130 
2131     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2132     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2133     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2134     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2135     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2136     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2137   } else {
2138     B = *matout;
2139     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2140     for (i=0; i<ai[ma]; i++) {
2141       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2142     }
2143   }
2144 
2145   /* copy over the A part */
2146   array = Aloc->a;
2147   row = A->rmap->rstart;
2148   for (i=0; i<ma; i++) {
2149     ncol = ai[i+1]-ai[i];
2150     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2151     row++; array += ncol; aj += ncol;
2152   }
2153   aj = Aloc->j;
2154   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2155 
2156   /* copy over the B part */
2157   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2158   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2159   array = Bloc->a;
2160   row = A->rmap->rstart;
2161   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2162   cols_tmp = cols;
2163   for (i=0; i<mb; i++) {
2164     ncol = bi[i+1]-bi[i];
2165     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2166     row++; array += ncol; cols_tmp += ncol;
2167   }
2168   ierr = PetscFree(cols);CHKERRQ(ierr);
2169 
2170   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2171   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2172   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2173     *matout = B;
2174   } else {
2175     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2182 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2183 {
2184   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2185   Mat            a = aij->A,b = aij->B;
2186   PetscErrorCode ierr;
2187   PetscInt       s1,s2,s3;
2188 
2189   PetscFunctionBegin;
2190   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2191   if (rr) {
2192     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2193     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2194     /* Overlap communication with computation. */
2195     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2196   }
2197   if (ll) {
2198     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2199     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2200     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2201   }
2202   /* scale  the diagonal block */
2203   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2204 
2205   if (rr) {
2206     /* Do a scatter end and then right scale the off-diagonal block */
2207     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2208     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2209   }
2210 
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 #undef __FUNCT__
2215 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2216 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2217 {
2218   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatEqual_MPIAIJ"
2228 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2229 {
2230   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2231   Mat            a,b,c,d;
2232   PetscBool      flg;
2233   PetscErrorCode ierr;
2234 
2235   PetscFunctionBegin;
2236   a = matA->A; b = matA->B;
2237   c = matB->A; d = matB->B;
2238 
2239   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2240   if (flg) {
2241     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2242   }
2243   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 #undef __FUNCT__
2248 #define __FUNCT__ "MatCopy_MPIAIJ"
2249 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2250 {
2251   PetscErrorCode ierr;
2252   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2253   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2254 
2255   PetscFunctionBegin;
2256   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2257   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2258     /* because of the column compression in the off-processor part of the matrix a->B,
2259        the number of columns in a->B and b->B may be different, hence we cannot call
2260        the MatCopy() directly on the two parts. If need be, we can provide a more
2261        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2262        then copying the submatrices */
2263     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2264   } else {
2265     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2266     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2267   }
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatSetUp_MPIAIJ"
2273 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2274 {
2275   PetscErrorCode ierr;
2276 
2277   PetscFunctionBegin;
2278   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2284 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2285 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2286 {
2287   PetscInt          i,m=Y->rmap->N;
2288   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2289   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2290   const PetscInt    *xi = x->i,*yi = y->i;
2291 
2292   PetscFunctionBegin;
2293   /* Set the number of nonzeros in the new matrix */
2294   for (i=0; i<m; i++) {
2295     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2296     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2297     nnz[i] = 0;
2298     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2299       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2300       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2301       nnz[i]++;
2302     }
2303     for (; k<nzy; k++) nnz[i]++;
2304   }
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #undef __FUNCT__
2309 #define __FUNCT__ "MatAXPY_MPIAIJ"
2310 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2311 {
2312   PetscErrorCode ierr;
2313   PetscInt       i;
2314   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2315   PetscBLASInt   bnz,one=1;
2316   Mat_SeqAIJ     *x,*y;
2317 
2318   PetscFunctionBegin;
2319   if (str == SAME_NONZERO_PATTERN) {
2320     PetscScalar alpha = a;
2321     x = (Mat_SeqAIJ *)xx->A->data;
2322     y = (Mat_SeqAIJ *)yy->A->data;
2323     bnz = PetscBLASIntCast(x->nz);
2324     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2325     x = (Mat_SeqAIJ *)xx->B->data;
2326     y = (Mat_SeqAIJ *)yy->B->data;
2327     bnz = PetscBLASIntCast(x->nz);
2328     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2329   } else if (str == SUBSET_NONZERO_PATTERN) {
2330     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2331 
2332     x = (Mat_SeqAIJ *)xx->B->data;
2333     y = (Mat_SeqAIJ *)yy->B->data;
2334     if (y->xtoy && y->XtoY != xx->B) {
2335       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2336       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2337     }
2338     if (!y->xtoy) { /* get xtoy */
2339       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2340       y->XtoY = xx->B;
2341       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2342     }
2343     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2344   } else {
2345     Mat B;
2346     PetscInt *nnz_d,*nnz_o;
2347     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2348     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2349     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2350     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2351     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2352     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2353     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2354     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2355     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2356     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2357     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2358     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2359     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2360     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2361   }
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2366 
2367 #undef __FUNCT__
2368 #define __FUNCT__ "MatConjugate_MPIAIJ"
2369 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2370 {
2371 #if defined(PETSC_USE_COMPLEX)
2372   PetscErrorCode ierr;
2373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2374 
2375   PetscFunctionBegin;
2376   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2377   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2378 #else
2379   PetscFunctionBegin;
2380 #endif
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatRealPart_MPIAIJ"
2386 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2387 {
2388   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2393   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2399 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2400 {
2401   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2402   PetscErrorCode ierr;
2403 
2404   PetscFunctionBegin;
2405   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2406   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 #if defined(PETSC_HAVE_PBGL)
2411 
2412 #include <boost/parallel/mpi/bsp_process_group.hpp>
2413 #include <boost/graph/distributed/ilu_default_graph.hpp>
2414 #include <boost/graph/distributed/ilu_0_block.hpp>
2415 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2416 #include <boost/graph/distributed/petsc/interface.hpp>
2417 #include <boost/multi_array.hpp>
2418 #include <boost/parallel/distributed_property_map->hpp>
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2422 /*
2423   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2424 */
2425 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2426 {
2427   namespace petsc = boost::distributed::petsc;
2428 
2429   namespace graph_dist = boost::graph::distributed;
2430   using boost::graph::distributed::ilu_default::process_group_type;
2431   using boost::graph::ilu_permuted;
2432 
2433   PetscBool       row_identity, col_identity;
2434   PetscContainer  c;
2435   PetscInt        m, n, M, N;
2436   PetscErrorCode  ierr;
2437 
2438   PetscFunctionBegin;
2439   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2440   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2441   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2442   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2443 
2444   process_group_type pg;
2445   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2446   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2447   lgraph_type&   level_graph = *lgraph_p;
2448   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2449 
2450   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2451   ilu_permuted(level_graph);
2452 
2453   /* put together the new matrix */
2454   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2455   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2456   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2457   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2458   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2459   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2460   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2461   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2462 
2463   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2464   ierr = PetscContainerSetPointer(c, lgraph_p);
2465   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2466   ierr = PetscContainerDestroy(&c);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2472 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2473 {
2474   PetscFunctionBegin;
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatSolve_MPIAIJ"
2480 /*
2481   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2482 */
2483 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2484 {
2485   namespace graph_dist = boost::graph::distributed;
2486 
2487   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2488   lgraph_type*   lgraph_p;
2489   PetscContainer c;
2490   PetscErrorCode ierr;
2491 
2492   PetscFunctionBegin;
2493   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2494   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2495   ierr = VecCopy(b, x);CHKERRQ(ierr);
2496 
2497   PetscScalar* array_x;
2498   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2499   PetscInt sx;
2500   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2501 
2502   PetscScalar* array_b;
2503   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2504   PetscInt sb;
2505   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2506 
2507   lgraph_type&   level_graph = *lgraph_p;
2508   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2509 
2510   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2511   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2512                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2513 
2514   typedef boost::iterator_property_map<array_ref_type::iterator,
2515                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2516   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2517                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2518 
2519   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2520 
2521   PetscFunctionReturn(0);
2522 }
2523 #endif
2524 
2525 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2526   PetscInt       nzlocal,nsends,nrecvs;
2527   PetscMPIInt    *send_rank,*recv_rank;
2528   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2529   PetscScalar    *sbuf_a,**rbuf_a;
2530   PetscErrorCode (*Destroy)(Mat);
2531 } Mat_Redundant;
2532 
2533 #undef __FUNCT__
2534 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2535 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2536 {
2537   PetscErrorCode       ierr;
2538   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2539   PetscInt             i;
2540 
2541   PetscFunctionBegin;
2542   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2543   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2544   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2545   for (i=0; i<redund->nrecvs; i++) {
2546     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2547     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2548   }
2549   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2550   ierr = PetscFree(redund);CHKERRQ(ierr);
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 #undef __FUNCT__
2555 #define __FUNCT__ "MatDestroy_MatRedundant"
2556 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2557 {
2558   PetscErrorCode  ierr;
2559   PetscContainer  container;
2560   Mat_Redundant   *redund=PETSC_NULL;
2561 
2562   PetscFunctionBegin;
2563   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2564   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2565   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2566   A->ops->destroy = redund->Destroy;
2567   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2568   if (A->ops->destroy) {
2569     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2570   }
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 #undef __FUNCT__
2575 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2576 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2577 {
2578   PetscMPIInt    rank,size;
2579   MPI_Comm       comm=((PetscObject)mat)->comm;
2580   PetscErrorCode ierr;
2581   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2582   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2583   PetscInt       *rowrange=mat->rmap->range;
2584   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2585   Mat            A=aij->A,B=aij->B,C=*matredundant;
2586   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2587   PetscScalar    *sbuf_a;
2588   PetscInt       nzlocal=a->nz+b->nz;
2589   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2590   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2591   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2592   MatScalar      *aworkA,*aworkB;
2593   PetscScalar    *vals;
2594   PetscMPIInt    tag1,tag2,tag3,imdex;
2595   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2596                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2597   MPI_Status     recv_status,*send_status;
2598   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2599   PetscInt       **rbuf_j=PETSC_NULL;
2600   PetscScalar    **rbuf_a=PETSC_NULL;
2601   Mat_Redundant  *redund=PETSC_NULL;
2602   PetscContainer container;
2603 
2604   PetscFunctionBegin;
2605   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2606   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2607 
2608   if (reuse == MAT_REUSE_MATRIX) {
2609     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2610     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2611     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2612     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2613     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2614     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2615     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2616     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2617 
2618     nsends    = redund->nsends;
2619     nrecvs    = redund->nrecvs;
2620     send_rank = redund->send_rank;
2621     recv_rank = redund->recv_rank;
2622     sbuf_nz   = redund->sbuf_nz;
2623     rbuf_nz   = redund->rbuf_nz;
2624     sbuf_j    = redund->sbuf_j;
2625     sbuf_a    = redund->sbuf_a;
2626     rbuf_j    = redund->rbuf_j;
2627     rbuf_a    = redund->rbuf_a;
2628   }
2629 
2630   if (reuse == MAT_INITIAL_MATRIX) {
2631     PetscMPIInt  subrank,subsize;
2632     PetscInt     nleftover,np_subcomm;
2633     /* get the destination processors' id send_rank, nsends and nrecvs */
2634     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2635     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2636     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2637     np_subcomm = size/nsubcomm;
2638     nleftover  = size - nsubcomm*np_subcomm;
2639     nsends = 0; nrecvs = 0;
2640     for (i=0; i<size; i++) { /* i=rank*/
2641       if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2642         send_rank[nsends] = i; nsends++;
2643         recv_rank[nrecvs++] = i;
2644       }
2645     }
2646     if (rank >= size - nleftover) {/* this proc is a leftover processor */
2647       i = size-nleftover-1;
2648       j = 0;
2649       while (j < nsubcomm - nleftover) {
2650         send_rank[nsends++] = i;
2651         i--; j++;
2652       }
2653     }
2654 
2655     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2656       for (i=0; i<nleftover; i++) {
2657         recv_rank[nrecvs++] = size-nleftover+i;
2658       }
2659     }
2660 
2661     /* allocate sbuf_j, sbuf_a */
2662     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2663     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2664     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2665   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2666 
2667   /* copy mat's local entries into the buffers */
2668   if (reuse == MAT_INITIAL_MATRIX) {
2669     rownz_max = 0;
2670     rptr = sbuf_j;
2671     cols = sbuf_j + rend-rstart + 1;
2672     vals = sbuf_a;
2673     rptr[0] = 0;
2674     for (i=0; i<rend-rstart; i++) {
2675       row = i + rstart;
2676       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2677       ncols  = nzA + nzB;
2678       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2679       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2680       /* load the column indices for this row into cols */
2681       lwrite = 0;
2682       for (l=0; l<nzB; l++) {
2683         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2684           vals[lwrite]   = aworkB[l];
2685           cols[lwrite++] = ctmp;
2686         }
2687       }
2688       for (l=0; l<nzA; l++) {
2689         vals[lwrite]   = aworkA[l];
2690         cols[lwrite++] = cstart + cworkA[l];
2691       }
2692       for (l=0; l<nzB; l++) {
2693         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2694           vals[lwrite]   = aworkB[l];
2695           cols[lwrite++] = ctmp;
2696         }
2697       }
2698       vals += ncols;
2699       cols += ncols;
2700       rptr[i+1] = rptr[i] + ncols;
2701       if (rownz_max < ncols) rownz_max = ncols;
2702     }
2703     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2704   } else { /* only copy matrix values into sbuf_a */
2705     rptr = sbuf_j;
2706     vals = sbuf_a;
2707     rptr[0] = 0;
2708     for (i=0; i<rend-rstart; i++) {
2709       row = i + rstart;
2710       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2711       ncols  = nzA + nzB;
2712       cworkB = b->j + b->i[i];
2713       aworkA = a->a + a->i[i];
2714       aworkB = b->a + b->i[i];
2715       lwrite = 0;
2716       for (l=0; l<nzB; l++) {
2717         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2718       }
2719       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2720       for (l=0; l<nzB; l++) {
2721         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2722       }
2723       vals += ncols;
2724       rptr[i+1] = rptr[i] + ncols;
2725     }
2726   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2727 
2728   /* send nzlocal to others, and recv other's nzlocal */
2729   /*--------------------------------------------------*/
2730   if (reuse == MAT_INITIAL_MATRIX) {
2731     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2732     s_waits2 = s_waits3 + nsends;
2733     s_waits1 = s_waits2 + nsends;
2734     r_waits1 = s_waits1 + nsends;
2735     r_waits2 = r_waits1 + nrecvs;
2736     r_waits3 = r_waits2 + nrecvs;
2737   } else {
2738     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2739     r_waits3 = s_waits3 + nsends;
2740   }
2741 
2742   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2743   if (reuse == MAT_INITIAL_MATRIX) {
2744     /* get new tags to keep the communication clean */
2745     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2746     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2747     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2748 
2749     /* post receives of other's nzlocal */
2750     for (i=0; i<nrecvs; i++) {
2751       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2752     }
2753     /* send nzlocal to others */
2754     for (i=0; i<nsends; i++) {
2755       sbuf_nz[i] = nzlocal;
2756       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2757     }
2758     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2759     count = nrecvs;
2760     while (count) {
2761       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2762       recv_rank[imdex] = recv_status.MPI_SOURCE;
2763       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2764       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2765 
2766       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2767       rbuf_nz[imdex] += i + 2;
2768       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2769       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2770       count--;
2771     }
2772     /* wait on sends of nzlocal */
2773     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2774     /* send mat->i,j to others, and recv from other's */
2775     /*------------------------------------------------*/
2776     for (i=0; i<nsends; i++) {
2777       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2778       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2779     }
2780     /* wait on receives of mat->i,j */
2781     /*------------------------------*/
2782     count = nrecvs;
2783     while (count) {
2784       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2785       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2786       count--;
2787     }
2788     /* wait on sends of mat->i,j */
2789     /*---------------------------*/
2790     if (nsends) {
2791       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2792     }
2793   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2794 
2795   /* post receives, send and receive mat->a */
2796   /*----------------------------------------*/
2797   for (imdex=0; imdex<nrecvs; imdex++) {
2798     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2799   }
2800   for (i=0; i<nsends; i++) {
2801     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2802   }
2803   count = nrecvs;
2804   while (count) {
2805     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2806     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2807     count--;
2808   }
2809   if (nsends) {
2810     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2811   }
2812 
2813   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2814 
2815   /* create redundant matrix */
2816   /*-------------------------*/
2817   if (reuse == MAT_INITIAL_MATRIX) {
2818     /* compute rownz_max for preallocation */
2819     for (imdex=0; imdex<nrecvs; imdex++) {
2820       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2821       rptr = rbuf_j[imdex];
2822       for (i=0; i<j; i++) {
2823         ncols = rptr[i+1] - rptr[i];
2824         if (rownz_max < ncols) rownz_max = ncols;
2825       }
2826     }
2827 
2828     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2829     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2830     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2831     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2832     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2833     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2834   } else {
2835     C = *matredundant;
2836   }
2837 
2838   /* insert local matrix entries */
2839   rptr = sbuf_j;
2840   cols = sbuf_j + rend-rstart + 1;
2841   vals = sbuf_a;
2842   for (i=0; i<rend-rstart; i++) {
2843     row   = i + rstart;
2844     ncols = rptr[i+1] - rptr[i];
2845     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2846     vals += ncols;
2847     cols += ncols;
2848   }
2849   /* insert received matrix entries */
2850   for (imdex=0; imdex<nrecvs; imdex++) {
2851     rstart = rowrange[recv_rank[imdex]];
2852     rend   = rowrange[recv_rank[imdex]+1];
2853     rptr = rbuf_j[imdex];
2854     cols = rbuf_j[imdex] + rend-rstart + 1;
2855     vals = rbuf_a[imdex];
2856     for (i=0; i<rend-rstart; i++) {
2857       row   = i + rstart;
2858       ncols = rptr[i+1] - rptr[i];
2859       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2860       vals += ncols;
2861       cols += ncols;
2862     }
2863   }
2864   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2865   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2866   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2867   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2868   if (reuse == MAT_INITIAL_MATRIX) {
2869     PetscContainer container;
2870     *matredundant = C;
2871     /* create a supporting struct and attach it to C for reuse */
2872     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2873     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2874     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2875     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2876     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2877     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2878 
2879     redund->nzlocal = nzlocal;
2880     redund->nsends  = nsends;
2881     redund->nrecvs  = nrecvs;
2882     redund->send_rank = send_rank;
2883     redund->recv_rank = recv_rank;
2884     redund->sbuf_nz = sbuf_nz;
2885     redund->rbuf_nz = rbuf_nz;
2886     redund->sbuf_j  = sbuf_j;
2887     redund->sbuf_a  = sbuf_a;
2888     redund->rbuf_j  = rbuf_j;
2889     redund->rbuf_a  = rbuf_a;
2890 
2891     redund->Destroy = C->ops->destroy;
2892     C->ops->destroy = MatDestroy_MatRedundant;
2893   }
2894   PetscFunctionReturn(0);
2895 }
2896 
2897 #undef __FUNCT__
2898 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2899 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2900 {
2901   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2902   PetscErrorCode ierr;
2903   PetscInt       i,*idxb = 0;
2904   PetscScalar    *va,*vb;
2905   Vec            vtmp;
2906 
2907   PetscFunctionBegin;
2908   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2909   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2910   if (idx) {
2911     for (i=0; i<A->rmap->n; i++) {
2912       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2913     }
2914   }
2915 
2916   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2917   if (idx) {
2918     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2919   }
2920   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2921   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2922 
2923   for (i=0; i<A->rmap->n; i++) {
2924     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2925       va[i] = vb[i];
2926       if (idx) idx[i] = a->garray[idxb[i]];
2927     }
2928   }
2929 
2930   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2931   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2932   ierr = PetscFree(idxb);CHKERRQ(ierr);
2933   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2934   PetscFunctionReturn(0);
2935 }
2936 
2937 #undef __FUNCT__
2938 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2939 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2940 {
2941   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2942   PetscErrorCode ierr;
2943   PetscInt       i,*idxb = 0;
2944   PetscScalar    *va,*vb;
2945   Vec            vtmp;
2946 
2947   PetscFunctionBegin;
2948   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2949   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2950   if (idx) {
2951     for (i=0; i<A->cmap->n; i++) {
2952       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2953     }
2954   }
2955 
2956   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2957   if (idx) {
2958     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2959   }
2960   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2961   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2962 
2963   for (i=0; i<A->rmap->n; i++) {
2964     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2965       va[i] = vb[i];
2966       if (idx) idx[i] = a->garray[idxb[i]];
2967     }
2968   }
2969 
2970   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2971   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2972   ierr = PetscFree(idxb);CHKERRQ(ierr);
2973   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2974   PetscFunctionReturn(0);
2975 }
2976 
2977 #undef __FUNCT__
2978 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2979 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2980 {
2981   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2982   PetscInt       n      = A->rmap->n;
2983   PetscInt       cstart = A->cmap->rstart;
2984   PetscInt      *cmap   = mat->garray;
2985   PetscInt      *diagIdx, *offdiagIdx;
2986   Vec            diagV, offdiagV;
2987   PetscScalar   *a, *diagA, *offdiagA;
2988   PetscInt       r;
2989   PetscErrorCode ierr;
2990 
2991   PetscFunctionBegin;
2992   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2993   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2994   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2995   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2996   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2997   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2998   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2999   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3000   for (r = 0; r < n; ++r) {
3001     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3002       a[r]   = diagA[r];
3003       idx[r] = cstart + diagIdx[r];
3004     } else {
3005       a[r]   = offdiagA[r];
3006       idx[r] = cmap[offdiagIdx[r]];
3007     }
3008   }
3009   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3010   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3011   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3012   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3013   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3014   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3015   PetscFunctionReturn(0);
3016 }
3017 
3018 #undef __FUNCT__
3019 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3020 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3021 {
3022   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
3023   PetscInt       n      = A->rmap->n;
3024   PetscInt       cstart = A->cmap->rstart;
3025   PetscInt      *cmap   = mat->garray;
3026   PetscInt      *diagIdx, *offdiagIdx;
3027   Vec            diagV, offdiagV;
3028   PetscScalar   *a, *diagA, *offdiagA;
3029   PetscInt       r;
3030   PetscErrorCode ierr;
3031 
3032   PetscFunctionBegin;
3033   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3034   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3035   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3036   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3037   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3038   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3039   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3040   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3041   for (r = 0; r < n; ++r) {
3042     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3043       a[r]   = diagA[r];
3044       idx[r] = cstart + diagIdx[r];
3045     } else {
3046       a[r]   = offdiagA[r];
3047       idx[r] = cmap[offdiagIdx[r]];
3048     }
3049   }
3050   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3051   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3052   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3053   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3054   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3055   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3056   PetscFunctionReturn(0);
3057 }
3058 
3059 #undef __FUNCT__
3060 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3061 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3062 {
3063   PetscErrorCode ierr;
3064   Mat            *dummy;
3065 
3066   PetscFunctionBegin;
3067   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3068   *newmat = *dummy;
3069   ierr = PetscFree(dummy);CHKERRQ(ierr);
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3074 
3075 #undef __FUNCT__
3076 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3077 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3078 {
3079   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3080   PetscErrorCode ierr;
3081 
3082   PetscFunctionBegin;
3083   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3084   PetscFunctionReturn(0);
3085 }
3086 
3087 #undef __FUNCT__
3088 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3089 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3090 {
3091   PetscErrorCode ierr;
3092   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3093 
3094   PetscFunctionBegin;
3095   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3096   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3097   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3098   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3099   PetscFunctionReturn(0);
3100 }
3101 
3102 /* -------------------------------------------------------------------*/
3103 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3104        MatGetRow_MPIAIJ,
3105        MatRestoreRow_MPIAIJ,
3106        MatMult_MPIAIJ,
3107 /* 4*/ MatMultAdd_MPIAIJ,
3108        MatMultTranspose_MPIAIJ,
3109        MatMultTransposeAdd_MPIAIJ,
3110 #if defined(PETSC_HAVE_PBGL)
3111        MatSolve_MPIAIJ,
3112 #else
3113        0,
3114 #endif
3115        0,
3116        0,
3117 /*10*/ 0,
3118        0,
3119        0,
3120        MatSOR_MPIAIJ,
3121        MatTranspose_MPIAIJ,
3122 /*15*/ MatGetInfo_MPIAIJ,
3123        MatEqual_MPIAIJ,
3124        MatGetDiagonal_MPIAIJ,
3125        MatDiagonalScale_MPIAIJ,
3126        MatNorm_MPIAIJ,
3127 /*20*/ MatAssemblyBegin_MPIAIJ,
3128        MatAssemblyEnd_MPIAIJ,
3129        MatSetOption_MPIAIJ,
3130        MatZeroEntries_MPIAIJ,
3131 /*24*/ MatZeroRows_MPIAIJ,
3132        0,
3133 #if defined(PETSC_HAVE_PBGL)
3134        0,
3135 #else
3136        0,
3137 #endif
3138        0,
3139        0,
3140 /*29*/ MatSetUp_MPIAIJ,
3141 #if defined(PETSC_HAVE_PBGL)
3142        0,
3143 #else
3144        0,
3145 #endif
3146        0,
3147        0,
3148        0,
3149 /*34*/ MatDuplicate_MPIAIJ,
3150        0,
3151        0,
3152        0,
3153        0,
3154 /*39*/ MatAXPY_MPIAIJ,
3155        MatGetSubMatrices_MPIAIJ,
3156        MatIncreaseOverlap_MPIAIJ,
3157        MatGetValues_MPIAIJ,
3158        MatCopy_MPIAIJ,
3159 /*44*/ MatGetRowMax_MPIAIJ,
3160        MatScale_MPIAIJ,
3161        0,
3162        0,
3163        MatZeroRowsColumns_MPIAIJ,
3164 /*49*/ MatSetRandom_MPIAIJ,
3165        0,
3166        0,
3167        0,
3168        0,
3169 /*54*/ MatFDColoringCreate_MPIAIJ,
3170        0,
3171        MatSetUnfactored_MPIAIJ,
3172        MatPermute_MPIAIJ,
3173        0,
3174 /*59*/ MatGetSubMatrix_MPIAIJ,
3175        MatDestroy_MPIAIJ,
3176        MatView_MPIAIJ,
3177        0,
3178        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3179 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3180        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3181        0,
3182        0,
3183        0,
3184 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3185        MatGetRowMinAbs_MPIAIJ,
3186        0,
3187        MatSetColoring_MPIAIJ,
3188        0,
3189        MatSetValuesAdifor_MPIAIJ,
3190 /*75*/ MatFDColoringApply_AIJ,
3191        0,
3192        0,
3193        0,
3194        MatFindZeroDiagonals_MPIAIJ,
3195 /*80*/ 0,
3196        0,
3197        0,
3198 /*83*/ MatLoad_MPIAIJ,
3199        0,
3200        0,
3201        0,
3202        0,
3203        0,
3204 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3205        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3206        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3207        MatPtAP_MPIAIJ_MPIAIJ,
3208        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3209 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3210        0,
3211        0,
3212        0,
3213        0,
3214 /*99*/ 0,
3215        0,
3216        0,
3217        MatConjugate_MPIAIJ,
3218        0,
3219 /*104*/MatSetValuesRow_MPIAIJ,
3220        MatRealPart_MPIAIJ,
3221        MatImaginaryPart_MPIAIJ,
3222        0,
3223        0,
3224 /*109*/0,
3225        MatGetRedundantMatrix_MPIAIJ,
3226        MatGetRowMin_MPIAIJ,
3227        0,
3228        0,
3229 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3230        0,
3231        0,
3232        0,
3233        0,
3234 /*119*/0,
3235        0,
3236        0,
3237        0,
3238        MatGetMultiProcBlock_MPIAIJ,
3239 /*124*/MatFindNonzeroRows_MPIAIJ,
3240        MatGetColumnNorms_MPIAIJ,
3241        MatInvertBlockDiagonal_MPIAIJ,
3242        0,
3243        MatGetSubMatricesParallel_MPIAIJ,
3244 /*129*/0,
3245        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3246        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3247        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3248        0,
3249 /*134*/0,
3250        0,
3251        0,
3252        0,
3253        0
3254 };
3255 
3256 /* ----------------------------------------------------------------------------------------*/
3257 
3258 EXTERN_C_BEGIN
3259 #undef __FUNCT__
3260 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3261 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3262 {
3263   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3264   PetscErrorCode ierr;
3265 
3266   PetscFunctionBegin;
3267   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3268   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3269   PetscFunctionReturn(0);
3270 }
3271 EXTERN_C_END
3272 
3273 EXTERN_C_BEGIN
3274 #undef __FUNCT__
3275 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3276 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3277 {
3278   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3279   PetscErrorCode ierr;
3280 
3281   PetscFunctionBegin;
3282   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3283   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3284   PetscFunctionReturn(0);
3285 }
3286 EXTERN_C_END
3287 
3288 EXTERN_C_BEGIN
3289 #undef __FUNCT__
3290 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3291 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3292 {
3293   Mat_MPIAIJ     *b;
3294   PetscErrorCode ierr;
3295   PetscInt       i;
3296   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3297 
3298   PetscFunctionBegin;
3299   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3300   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3301   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3302   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3303   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3304   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3305 
3306   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3307   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3308   if (d_nnz) {
3309     for (i=0; i<B->rmap->n; i++) {
3310       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3311     }
3312   }
3313   if (o_nnz) {
3314     for (i=0; i<B->rmap->n; i++) {
3315       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3316     }
3317   }
3318   b = (Mat_MPIAIJ*)B->data;
3319 
3320   if (!B->preallocated) {
3321     /* Explicitly create 2 MATSEQAIJ matrices. */
3322     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3323     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3324     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3325     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3326     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3327     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3328     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3329     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3330     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3331     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3332   }
3333 
3334   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3335   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3336   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3337   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3338   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3339   B->preallocated = PETSC_TRUE;
3340   PetscFunctionReturn(0);
3341 }
3342 EXTERN_C_END
3343 
3344 #undef __FUNCT__
3345 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3346 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3347 {
3348   Mat            mat;
3349   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3350   PetscErrorCode ierr;
3351 
3352   PetscFunctionBegin;
3353   *newmat       = 0;
3354   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3355   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3356   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3357   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3358   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3359   a    = (Mat_MPIAIJ*)mat->data;
3360 
3361   mat->factortype    = matin->factortype;
3362   mat->rmap->bs      = matin->rmap->bs;
3363   mat->cmap->bs      = matin->cmap->bs;
3364   mat->assembled    = PETSC_TRUE;
3365   mat->insertmode   = NOT_SET_VALUES;
3366   mat->preallocated = PETSC_TRUE;
3367 
3368   a->size           = oldmat->size;
3369   a->rank           = oldmat->rank;
3370   a->donotstash     = oldmat->donotstash;
3371   a->roworiented    = oldmat->roworiented;
3372   a->rowindices     = 0;
3373   a->rowvalues      = 0;
3374   a->getrowactive   = PETSC_FALSE;
3375 
3376   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3377   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3378 
3379   if (oldmat->colmap) {
3380 #if defined (PETSC_USE_CTABLE)
3381     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3382 #else
3383     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3384     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3385     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3386 #endif
3387   } else a->colmap = 0;
3388   if (oldmat->garray) {
3389     PetscInt len;
3390     len  = oldmat->B->cmap->n;
3391     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3392     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3393     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3394   } else a->garray = 0;
3395 
3396   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3397   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3398   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3399   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3400   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3401   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3402   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3403   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3404   ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3405   *newmat = mat;
3406   PetscFunctionReturn(0);
3407 }
3408 
3409 
3410 
3411 #undef __FUNCT__
3412 #define __FUNCT__ "MatLoad_MPIAIJ"
3413 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3414 {
3415   PetscScalar    *vals,*svals;
3416   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3417   PetscErrorCode ierr;
3418   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3419   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3420   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3421   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3422   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3423   int            fd;
3424   PetscInt       bs = 1;
3425 
3426   PetscFunctionBegin;
3427   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3428   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3429   if (!rank) {
3430     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3431     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3432     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3433   }
3434 
3435   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3436   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3437   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3438 
3439   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3440 
3441   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3442   M = header[1]; N = header[2];
3443   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3444   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3445   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3446 
3447   /* If global sizes are set, check if they are consistent with that given in the file */
3448   if (sizesset) {
3449     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3450   }
3451   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3452   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3453 
3454   /* determine ownership of all (block) rows */
3455   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3456   if (newMat->rmap->n < 0) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3457   else m = newMat->rmap->n; /* Set by user */
3458 
3459   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3460   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3461 
3462   /* First process needs enough room for process with most rows */
3463   if (!rank) {
3464     mmax = rowners[1];
3465     for (i=2; i<=size; i++) {
3466       mmax = PetscMax(mmax, rowners[i]);
3467     }
3468   } else mmax = m;
3469 
3470   rowners[0] = 0;
3471   for (i=2; i<=size; i++) {
3472     rowners[i] += rowners[i-1];
3473   }
3474   rstart = rowners[rank];
3475   rend   = rowners[rank+1];
3476 
3477   /* distribute row lengths to all processors */
3478   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3479   if (!rank) {
3480     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3481     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3482     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3483     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3484     for (j=0; j<m; j++) {
3485       procsnz[0] += ourlens[j];
3486     }
3487     for (i=1; i<size; i++) {
3488       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3489       /* calculate the number of nonzeros on each processor */
3490       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3491         procsnz[i] += rowlengths[j];
3492       }
3493       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3494     }
3495     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3496   } else {
3497     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3498   }
3499 
3500   if (!rank) {
3501     /* determine max buffer needed and allocate it */
3502     maxnz = 0;
3503     for (i=0; i<size; i++) {
3504       maxnz = PetscMax(maxnz,procsnz[i]);
3505     }
3506     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3507 
3508     /* read in my part of the matrix column indices  */
3509     nz   = procsnz[0];
3510     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3511     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3512 
3513     /* read in every one elses and ship off */
3514     for (i=1; i<size; i++) {
3515       nz     = procsnz[i];
3516       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3517       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3518     }
3519     ierr = PetscFree(cols);CHKERRQ(ierr);
3520   } else {
3521     /* determine buffer space needed for message */
3522     nz = 0;
3523     for (i=0; i<m; i++) {
3524       nz += ourlens[i];
3525     }
3526     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3527 
3528     /* receive message of column indices*/
3529     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3530   }
3531 
3532   /* determine column ownership if matrix is not square */
3533   if (N != M) {
3534     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3535     else n = newMat->cmap->n;
3536     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3537     cstart = cend - n;
3538   } else {
3539     cstart = rstart;
3540     cend   = rend;
3541     n      = cend - cstart;
3542   }
3543 
3544   /* loop over local rows, determining number of off diagonal entries */
3545   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3546   jj = 0;
3547   for (i=0; i<m; i++) {
3548     for (j=0; j<ourlens[i]; j++) {
3549       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3550       jj++;
3551     }
3552   }
3553 
3554   for (i=0; i<m; i++) {
3555     ourlens[i] -= offlens[i];
3556   }
3557   if (!sizesset) {
3558     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3559   }
3560 
3561   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3562 
3563   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3564 
3565   for (i=0; i<m; i++) {
3566     ourlens[i] += offlens[i];
3567   }
3568 
3569   if (!rank) {
3570     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3571 
3572     /* read in my part of the matrix numerical values  */
3573     nz   = procsnz[0];
3574     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3575 
3576     /* insert into matrix */
3577     jj      = rstart;
3578     smycols = mycols;
3579     svals   = vals;
3580     for (i=0; i<m; i++) {
3581       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3582       smycols += ourlens[i];
3583       svals   += ourlens[i];
3584       jj++;
3585     }
3586 
3587     /* read in other processors and ship out */
3588     for (i=1; i<size; i++) {
3589       nz     = procsnz[i];
3590       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3591       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3592     }
3593     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3594   } else {
3595     /* receive numeric values */
3596     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3597 
3598     /* receive message of values*/
3599     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3600 
3601     /* insert into matrix */
3602     jj      = rstart;
3603     smycols = mycols;
3604     svals   = vals;
3605     for (i=0; i<m; i++) {
3606       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3607       smycols += ourlens[i];
3608       svals   += ourlens[i];
3609       jj++;
3610     }
3611   }
3612   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3613   ierr = PetscFree(vals);CHKERRQ(ierr);
3614   ierr = PetscFree(mycols);CHKERRQ(ierr);
3615   ierr = PetscFree(rowners);CHKERRQ(ierr);
3616   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3617   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3618 
3619   PetscFunctionReturn(0);
3620 }
3621 
3622 #undef __FUNCT__
3623 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3624 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3625 {
3626   PetscErrorCode ierr;
3627   IS             iscol_local;
3628   PetscInt       csize;
3629 
3630   PetscFunctionBegin;
3631   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3632   if (call == MAT_REUSE_MATRIX) {
3633     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3634     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3635   } else {
3636     PetscInt cbs;
3637     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3638     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3639     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3640   }
3641   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3642   if (call == MAT_INITIAL_MATRIX) {
3643     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3644     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3645   }
3646   PetscFunctionReturn(0);
3647 }
3648 
3649 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3650 #undef __FUNCT__
3651 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3652 /*
3653     Not great since it makes two copies of the submatrix, first an SeqAIJ
3654   in local and then by concatenating the local matrices the end result.
3655   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3656 
3657   Note: This requires a sequential iscol with all indices.
3658 */
3659 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3660 {
3661   PetscErrorCode ierr;
3662   PetscMPIInt    rank,size;
3663   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3664   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3665   PetscBool      allcolumns, colflag;
3666   Mat            M,Mreuse;
3667   MatScalar      *vwork,*aa;
3668   MPI_Comm       comm = ((PetscObject)mat)->comm;
3669   Mat_SeqAIJ     *aij;
3670 
3671 
3672   PetscFunctionBegin;
3673   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3674   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3675 
3676   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3677   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3678   if (colflag && ncol == mat->cmap->N) {
3679     allcolumns = PETSC_TRUE;
3680   } else {
3681     allcolumns = PETSC_FALSE;
3682   }
3683   if (call ==  MAT_REUSE_MATRIX) {
3684     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3685     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3686     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3687   } else {
3688     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3689   }
3690 
3691   /*
3692       m - number of local rows
3693       n - number of columns (same on all processors)
3694       rstart - first row in new global matrix generated
3695   */
3696   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3697   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3698   if (call == MAT_INITIAL_MATRIX) {
3699     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3700     ii  = aij->i;
3701     jj  = aij->j;
3702 
3703     /*
3704         Determine the number of non-zeros in the diagonal and off-diagonal
3705         portions of the matrix in order to do correct preallocation
3706     */
3707 
3708     /* first get start and end of "diagonal" columns */
3709     if (csize == PETSC_DECIDE) {
3710       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3711       if (mglobal == n) { /* square matrix */
3712         nlocal = m;
3713       } else {
3714         nlocal = n/size + ((n % size) > rank);
3715       }
3716     } else {
3717       nlocal = csize;
3718     }
3719     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3720     rstart = rend - nlocal;
3721     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3722 
3723     /* next, compute all the lengths */
3724     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3725     olens = dlens + m;
3726     for (i=0; i<m; i++) {
3727       jend = ii[i+1] - ii[i];
3728       olen = 0;
3729       dlen = 0;
3730       for (j=0; j<jend; j++) {
3731         if (*jj < rstart || *jj >= rend) olen++;
3732         else dlen++;
3733         jj++;
3734       }
3735       olens[i] = olen;
3736       dlens[i] = dlen;
3737     }
3738     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3739     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3740     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3741     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3742     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3743     ierr = PetscFree(dlens);CHKERRQ(ierr);
3744   } else {
3745     PetscInt ml,nl;
3746 
3747     M = *newmat;
3748     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3749     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3750     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3751     /*
3752          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3753        rather than the slower MatSetValues().
3754     */
3755     M->was_assembled = PETSC_TRUE;
3756     M->assembled     = PETSC_FALSE;
3757   }
3758   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3759   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3760   ii  = aij->i;
3761   jj  = aij->j;
3762   aa  = aij->a;
3763   for (i=0; i<m; i++) {
3764     row   = rstart + i;
3765     nz    = ii[i+1] - ii[i];
3766     cwork = jj;     jj += nz;
3767     vwork = aa;     aa += nz;
3768     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3769   }
3770 
3771   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3772   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3773   *newmat = M;
3774 
3775   /* save submatrix used in processor for next request */
3776   if (call ==  MAT_INITIAL_MATRIX) {
3777     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3778     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3779   }
3780 
3781   PetscFunctionReturn(0);
3782 }
3783 
3784 EXTERN_C_BEGIN
3785 #undef __FUNCT__
3786 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3787 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3788 {
3789   PetscInt       m,cstart, cend,j,nnz,i,d;
3790   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3791   const PetscInt *JJ;
3792   PetscScalar    *values;
3793   PetscErrorCode ierr;
3794 
3795   PetscFunctionBegin;
3796   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3797 
3798   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3799   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3800   m      = B->rmap->n;
3801   cstart = B->cmap->rstart;
3802   cend   = B->cmap->rend;
3803   rstart = B->rmap->rstart;
3804 
3805   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3806 
3807 #if defined(PETSC_USE_DEBUGGING)
3808   for (i=0; i<m; i++) {
3809     nnz     = Ii[i+1]- Ii[i];
3810     JJ      = J + Ii[i];
3811     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3812     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3813     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3814   }
3815 #endif
3816 
3817   for (i=0; i<m; i++) {
3818     nnz     = Ii[i+1]- Ii[i];
3819     JJ      = J + Ii[i];
3820     nnz_max = PetscMax(nnz_max,nnz);
3821     d       = 0;
3822     for (j=0; j<nnz; j++) {
3823       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3824     }
3825     d_nnz[i] = d;
3826     o_nnz[i] = nnz - d;
3827   }
3828   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3829   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3830 
3831   if (v) values = (PetscScalar*)v;
3832   else {
3833     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3834     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3835   }
3836 
3837   for (i=0; i<m; i++) {
3838     ii   = i + rstart;
3839     nnz  = Ii[i+1]- Ii[i];
3840     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3841   }
3842   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3843   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3844 
3845   if (!v) {
3846     ierr = PetscFree(values);CHKERRQ(ierr);
3847   }
3848   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3849   PetscFunctionReturn(0);
3850 }
3851 EXTERN_C_END
3852 
3853 #undef __FUNCT__
3854 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3855 /*@
3856    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3857    (the default parallel PETSc format).
3858 
3859    Collective on MPI_Comm
3860 
3861    Input Parameters:
3862 +  B - the matrix
3863 .  i - the indices into j for the start of each local row (starts with zero)
3864 .  j - the column indices for each local row (starts with zero)
3865 -  v - optional values in the matrix
3866 
3867    Level: developer
3868 
3869    Notes:
3870        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3871      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3872      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3873 
3874        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3875 
3876        The format which is used for the sparse matrix input, is equivalent to a
3877     row-major ordering.. i.e for the following matrix, the input data expected is
3878     as shown:
3879 
3880         1 0 0
3881         2 0 3     P0
3882        -------
3883         4 5 6     P1
3884 
3885      Process0 [P0]: rows_owned=[0,1]
3886         i =  {0,1,3}  [size = nrow+1  = 2+1]
3887         j =  {0,0,2}  [size = nz = 6]
3888         v =  {1,2,3}  [size = nz = 6]
3889 
3890      Process1 [P1]: rows_owned=[2]
3891         i =  {0,3}    [size = nrow+1  = 1+1]
3892         j =  {0,1,2}  [size = nz = 6]
3893         v =  {4,5,6}  [size = nz = 6]
3894 
3895 .keywords: matrix, aij, compressed row, sparse, parallel
3896 
3897 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3898           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3899 @*/
3900 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3901 {
3902   PetscErrorCode ierr;
3903 
3904   PetscFunctionBegin;
3905   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3906   PetscFunctionReturn(0);
3907 }
3908 
3909 #undef __FUNCT__
3910 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3911 /*@C
3912    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3913    (the default parallel PETSc format).  For good matrix assembly performance
3914    the user should preallocate the matrix storage by setting the parameters
3915    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3916    performance can be increased by more than a factor of 50.
3917 
3918    Collective on MPI_Comm
3919 
3920    Input Parameters:
3921 +  A - the matrix
3922 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3923            (same value is used for all local rows)
3924 .  d_nnz - array containing the number of nonzeros in the various rows of the
3925            DIAGONAL portion of the local submatrix (possibly different for each row)
3926            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3927            The size of this array is equal to the number of local rows, i.e 'm'.
3928            For matrices that will be factored, you must leave room for (and set)
3929            the diagonal entry even if it is zero.
3930 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3931            submatrix (same value is used for all local rows).
3932 -  o_nnz - array containing the number of nonzeros in the various rows of the
3933            OFF-DIAGONAL portion of the local submatrix (possibly different for
3934            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3935            structure. The size of this array is equal to the number
3936            of local rows, i.e 'm'.
3937 
3938    If the *_nnz parameter is given then the *_nz parameter is ignored
3939 
3940    The AIJ format (also called the Yale sparse matrix format or
3941    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3942    storage.  The stored row and column indices begin with zero.
3943    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3944 
3945    The parallel matrix is partitioned such that the first m0 rows belong to
3946    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3947    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3948 
3949    The DIAGONAL portion of the local submatrix of a processor can be defined
3950    as the submatrix which is obtained by extraction the part corresponding to
3951    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3952    first row that belongs to the processor, r2 is the last row belonging to
3953    the this processor, and c1-c2 is range of indices of the local part of a
3954    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3955    common case of a square matrix, the row and column ranges are the same and
3956    the DIAGONAL part is also square. The remaining portion of the local
3957    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3958 
3959    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3960 
3961    You can call MatGetInfo() to get information on how effective the preallocation was;
3962    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3963    You can also run with the option -info and look for messages with the string
3964    malloc in them to see if additional memory allocation was needed.
3965 
3966    Example usage:
3967 
3968    Consider the following 8x8 matrix with 34 non-zero values, that is
3969    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3970    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3971    as follows:
3972 
3973 .vb
3974             1  2  0  |  0  3  0  |  0  4
3975     Proc0   0  5  6  |  7  0  0  |  8  0
3976             9  0 10  | 11  0  0  | 12  0
3977     -------------------------------------
3978            13  0 14  | 15 16 17  |  0  0
3979     Proc1   0 18  0  | 19 20 21  |  0  0
3980             0  0  0  | 22 23  0  | 24  0
3981     -------------------------------------
3982     Proc2  25 26 27  |  0  0 28  | 29  0
3983            30  0  0  | 31 32 33  |  0 34
3984 .ve
3985 
3986    This can be represented as a collection of submatrices as:
3987 
3988 .vb
3989       A B C
3990       D E F
3991       G H I
3992 .ve
3993 
3994    Where the submatrices A,B,C are owned by proc0, D,E,F are
3995    owned by proc1, G,H,I are owned by proc2.
3996 
3997    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3998    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3999    The 'M','N' parameters are 8,8, and have the same values on all procs.
4000 
4001    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4002    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4003    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4004    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4005    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4006    matrix, ans [DF] as another SeqAIJ matrix.
4007 
4008    When d_nz, o_nz parameters are specified, d_nz storage elements are
4009    allocated for every row of the local diagonal submatrix, and o_nz
4010    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4011    One way to choose d_nz and o_nz is to use the max nonzerors per local
4012    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4013    In this case, the values of d_nz,o_nz are:
4014 .vb
4015      proc0 : dnz = 2, o_nz = 2
4016      proc1 : dnz = 3, o_nz = 2
4017      proc2 : dnz = 1, o_nz = 4
4018 .ve
4019    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4020    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4021    for proc3. i.e we are using 12+15+10=37 storage locations to store
4022    34 values.
4023 
4024    When d_nnz, o_nnz parameters are specified, the storage is specified
4025    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4026    In the above case the values for d_nnz,o_nnz are:
4027 .vb
4028      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4029      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4030      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4031 .ve
4032    Here the space allocated is sum of all the above values i.e 34, and
4033    hence pre-allocation is perfect.
4034 
4035    Level: intermediate
4036 
4037 .keywords: matrix, aij, compressed row, sparse, parallel
4038 
4039 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4040           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4041 @*/
4042 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4043 {
4044   PetscErrorCode ierr;
4045 
4046   PetscFunctionBegin;
4047   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4048   PetscValidType(B,1);
4049   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4050   PetscFunctionReturn(0);
4051 }
4052 
4053 #undef __FUNCT__
4054 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4055 /*@
4056      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4057          CSR format the local rows.
4058 
4059    Collective on MPI_Comm
4060 
4061    Input Parameters:
4062 +  comm - MPI communicator
4063 .  m - number of local rows (Cannot be PETSC_DECIDE)
4064 .  n - This value should be the same as the local size used in creating the
4065        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4066        calculated if N is given) For square matrices n is almost always m.
4067 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4068 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4069 .   i - row indices
4070 .   j - column indices
4071 -   a - matrix values
4072 
4073    Output Parameter:
4074 .   mat - the matrix
4075 
4076    Level: intermediate
4077 
4078    Notes:
4079        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4080      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4081      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4082 
4083        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4084 
4085        The format which is used for the sparse matrix input, is equivalent to a
4086     row-major ordering.. i.e for the following matrix, the input data expected is
4087     as shown:
4088 
4089         1 0 0
4090         2 0 3     P0
4091        -------
4092         4 5 6     P1
4093 
4094      Process0 [P0]: rows_owned=[0,1]
4095         i =  {0,1,3}  [size = nrow+1  = 2+1]
4096         j =  {0,0,2}  [size = nz = 6]
4097         v =  {1,2,3}  [size = nz = 6]
4098 
4099      Process1 [P1]: rows_owned=[2]
4100         i =  {0,3}    [size = nrow+1  = 1+1]
4101         j =  {0,1,2}  [size = nz = 6]
4102         v =  {4,5,6}  [size = nz = 6]
4103 
4104 .keywords: matrix, aij, compressed row, sparse, parallel
4105 
4106 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4107           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4108 @*/
4109 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4110 {
4111   PetscErrorCode ierr;
4112 
4113   PetscFunctionBegin;
4114   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4115   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4116   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4117   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4118   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4119   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4120   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4121   PetscFunctionReturn(0);
4122 }
4123 
4124 #undef __FUNCT__
4125 #define __FUNCT__ "MatCreateAIJ"
4126 /*@C
4127    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4128    (the default parallel PETSc format).  For good matrix assembly performance
4129    the user should preallocate the matrix storage by setting the parameters
4130    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4131    performance can be increased by more than a factor of 50.
4132 
4133    Collective on MPI_Comm
4134 
4135    Input Parameters:
4136 +  comm - MPI communicator
4137 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4138            This value should be the same as the local size used in creating the
4139            y vector for the matrix-vector product y = Ax.
4140 .  n - This value should be the same as the local size used in creating the
4141        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4142        calculated if N is given) For square matrices n is almost always m.
4143 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4144 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4145 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4146            (same value is used for all local rows)
4147 .  d_nnz - array containing the number of nonzeros in the various rows of the
4148            DIAGONAL portion of the local submatrix (possibly different for each row)
4149            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4150            The size of this array is equal to the number of local rows, i.e 'm'.
4151 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4152            submatrix (same value is used for all local rows).
4153 -  o_nnz - array containing the number of nonzeros in the various rows of the
4154            OFF-DIAGONAL portion of the local submatrix (possibly different for
4155            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4156            structure. The size of this array is equal to the number
4157            of local rows, i.e 'm'.
4158 
4159    Output Parameter:
4160 .  A - the matrix
4161 
4162    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4163    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4164    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4165 
4166    Notes:
4167    If the *_nnz parameter is given then the *_nz parameter is ignored
4168 
4169    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4170    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4171    storage requirements for this matrix.
4172 
4173    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4174    processor than it must be used on all processors that share the object for
4175    that argument.
4176 
4177    The user MUST specify either the local or global matrix dimensions
4178    (possibly both).
4179 
4180    The parallel matrix is partitioned across processors such that the
4181    first m0 rows belong to process 0, the next m1 rows belong to
4182    process 1, the next m2 rows belong to process 2 etc.. where
4183    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4184    values corresponding to [m x N] submatrix.
4185 
4186    The columns are logically partitioned with the n0 columns belonging
4187    to 0th partition, the next n1 columns belonging to the next
4188    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4189 
4190    The DIAGONAL portion of the local submatrix on any given processor
4191    is the submatrix corresponding to the rows and columns m,n
4192    corresponding to the given processor. i.e diagonal matrix on
4193    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4194    etc. The remaining portion of the local submatrix [m x (N-n)]
4195    constitute the OFF-DIAGONAL portion. The example below better
4196    illustrates this concept.
4197 
4198    For a square global matrix we define each processor's diagonal portion
4199    to be its local rows and the corresponding columns (a square submatrix);
4200    each processor's off-diagonal portion encompasses the remainder of the
4201    local matrix (a rectangular submatrix).
4202 
4203    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4204 
4205    When calling this routine with a single process communicator, a matrix of
4206    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4207    type of communicator, use the construction mechanism:
4208      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4209 
4210    By default, this format uses inodes (identical nodes) when possible.
4211    We search for consecutive rows with the same nonzero structure, thereby
4212    reusing matrix information to achieve increased efficiency.
4213 
4214    Options Database Keys:
4215 +  -mat_no_inode  - Do not use inodes
4216 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4217 -  -mat_aij_oneindex - Internally use indexing starting at 1
4218         rather than 0.  Note that when calling MatSetValues(),
4219         the user still MUST index entries starting at 0!
4220 
4221 
4222    Example usage:
4223 
4224    Consider the following 8x8 matrix with 34 non-zero values, that is
4225    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4226    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4227    as follows:
4228 
4229 .vb
4230             1  2  0  |  0  3  0  |  0  4
4231     Proc0   0  5  6  |  7  0  0  |  8  0
4232             9  0 10  | 11  0  0  | 12  0
4233     -------------------------------------
4234            13  0 14  | 15 16 17  |  0  0
4235     Proc1   0 18  0  | 19 20 21  |  0  0
4236             0  0  0  | 22 23  0  | 24  0
4237     -------------------------------------
4238     Proc2  25 26 27  |  0  0 28  | 29  0
4239            30  0  0  | 31 32 33  |  0 34
4240 .ve
4241 
4242    This can be represented as a collection of submatrices as:
4243 
4244 .vb
4245       A B C
4246       D E F
4247       G H I
4248 .ve
4249 
4250    Where the submatrices A,B,C are owned by proc0, D,E,F are
4251    owned by proc1, G,H,I are owned by proc2.
4252 
4253    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4254    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4255    The 'M','N' parameters are 8,8, and have the same values on all procs.
4256 
4257    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4258    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4259    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4260    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4261    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4262    matrix, ans [DF] as another SeqAIJ matrix.
4263 
4264    When d_nz, o_nz parameters are specified, d_nz storage elements are
4265    allocated for every row of the local diagonal submatrix, and o_nz
4266    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4267    One way to choose d_nz and o_nz is to use the max nonzerors per local
4268    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4269    In this case, the values of d_nz,o_nz are:
4270 .vb
4271      proc0 : dnz = 2, o_nz = 2
4272      proc1 : dnz = 3, o_nz = 2
4273      proc2 : dnz = 1, o_nz = 4
4274 .ve
4275    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4276    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4277    for proc3. i.e we are using 12+15+10=37 storage locations to store
4278    34 values.
4279 
4280    When d_nnz, o_nnz parameters are specified, the storage is specified
4281    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4282    In the above case the values for d_nnz,o_nnz are:
4283 .vb
4284      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4285      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4286      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4287 .ve
4288    Here the space allocated is sum of all the above values i.e 34, and
4289    hence pre-allocation is perfect.
4290 
4291    Level: intermediate
4292 
4293 .keywords: matrix, aij, compressed row, sparse, parallel
4294 
4295 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4296           MPIAIJ, MatCreateMPIAIJWithArrays()
4297 @*/
4298 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4299 {
4300   PetscErrorCode ierr;
4301   PetscMPIInt    size;
4302 
4303   PetscFunctionBegin;
4304   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4305   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4306   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4307   if (size > 1) {
4308     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4309     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4310   } else {
4311     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4312     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4313   }
4314   PetscFunctionReturn(0);
4315 }
4316 
4317 #undef __FUNCT__
4318 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4319 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4320 {
4321   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4322 
4323   PetscFunctionBegin;
4324   *Ad     = a->A;
4325   *Ao     = a->B;
4326   *colmap = a->garray;
4327   PetscFunctionReturn(0);
4328 }
4329 
4330 #undef __FUNCT__
4331 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4332 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4333 {
4334   PetscErrorCode ierr;
4335   PetscInt       i;
4336   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4337 
4338   PetscFunctionBegin;
4339   if (coloring->ctype == IS_COLORING_GLOBAL) {
4340     ISColoringValue *allcolors,*colors;
4341     ISColoring      ocoloring;
4342 
4343     /* set coloring for diagonal portion */
4344     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4345 
4346     /* set coloring for off-diagonal portion */
4347     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4348     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4349     for (i=0; i<a->B->cmap->n; i++) {
4350       colors[i] = allcolors[a->garray[i]];
4351     }
4352     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4353     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4354     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4355     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4356   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4357     ISColoringValue *colors;
4358     PetscInt        *larray;
4359     ISColoring      ocoloring;
4360 
4361     /* set coloring for diagonal portion */
4362     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4363     for (i=0; i<a->A->cmap->n; i++) {
4364       larray[i] = i + A->cmap->rstart;
4365     }
4366     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4367     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4368     for (i=0; i<a->A->cmap->n; i++) {
4369       colors[i] = coloring->colors[larray[i]];
4370     }
4371     ierr = PetscFree(larray);CHKERRQ(ierr);
4372     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4373     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4374     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4375 
4376     /* set coloring for off-diagonal portion */
4377     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4378     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4379     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4380     for (i=0; i<a->B->cmap->n; i++) {
4381       colors[i] = coloring->colors[larray[i]];
4382     }
4383     ierr = PetscFree(larray);CHKERRQ(ierr);
4384     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4385     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4386     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4387   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4388 
4389   PetscFunctionReturn(0);
4390 }
4391 
4392 #undef __FUNCT__
4393 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4394 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4395 {
4396   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4397   PetscErrorCode ierr;
4398 
4399   PetscFunctionBegin;
4400   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4401   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4402   PetscFunctionReturn(0);
4403 }
4404 
4405 #undef __FUNCT__
4406 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4407 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4408 {
4409   PetscErrorCode ierr;
4410   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4411   PetscInt       *indx;
4412 
4413   PetscFunctionBegin;
4414   /* This routine will ONLY return MPIAIJ type matrix */
4415   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4416   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4417   if (n == PETSC_DECIDE) {
4418     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4419   }
4420   /* Check sum(n) = N */
4421   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4422   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4423 
4424   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4425   rstart -= m;
4426 
4427   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4428   for (i=0;i<m;i++) {
4429     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4430     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4431     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4432   }
4433 
4434   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4435   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4436   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4437   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4438   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4439   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4440   PetscFunctionReturn(0);
4441 }
4442 
4443 #undef __FUNCT__
4444 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4445 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4446 {
4447   PetscErrorCode ierr;
4448   PetscInt       m,N,i,rstart,nnz,Ii;
4449   PetscInt       *indx;
4450   PetscScalar    *values;
4451 
4452   PetscFunctionBegin;
4453   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4454   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4455   for (i=0;i<m;i++) {
4456     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4457     Ii    = i + rstart;
4458     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4459     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4460   }
4461   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4462   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4463   PetscFunctionReturn(0);
4464 }
4465 
4466 #undef __FUNCT__
4467 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4468 /*@
4469       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4470                  matrices from each processor
4471 
4472     Collective on MPI_Comm
4473 
4474    Input Parameters:
4475 +    comm - the communicators the parallel matrix will live on
4476 .    inmat - the input sequential matrices
4477 .    n - number of local columns (or PETSC_DECIDE)
4478 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4479 
4480    Output Parameter:
4481 .    outmat - the parallel matrix generated
4482 
4483     Level: advanced
4484 
4485    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4486 
4487 @*/
4488 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4489 {
4490   PetscErrorCode ierr;
4491 
4492   PetscFunctionBegin;
4493   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4494   if (scall == MAT_INITIAL_MATRIX) {
4495     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4496   }
4497   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4498   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4499   PetscFunctionReturn(0);
4500 }
4501 
4502 #undef __FUNCT__
4503 #define __FUNCT__ "MatFileSplit"
4504 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4505 {
4506   PetscErrorCode    ierr;
4507   PetscMPIInt       rank;
4508   PetscInt          m,N,i,rstart,nnz;
4509   size_t            len;
4510   const PetscInt    *indx;
4511   PetscViewer       out;
4512   char              *name;
4513   Mat               B;
4514   const PetscScalar *values;
4515 
4516   PetscFunctionBegin;
4517   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4518   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4519   /* Should this be the type of the diagonal block of A? */
4520   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4521   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4522   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4523   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4524   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4525   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4526   for (i=0;i<m;i++) {
4527     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4528     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4529     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4530   }
4531   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4532   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4533 
4534   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4535   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4536   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4537   sprintf(name,"%s.%d",outfile,rank);
4538   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4539   ierr = PetscFree(name);CHKERRQ(ierr);
4540   ierr = MatView(B,out);CHKERRQ(ierr);
4541   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4542   ierr = MatDestroy(&B);CHKERRQ(ierr);
4543   PetscFunctionReturn(0);
4544 }
4545 
4546 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4547 #undef __FUNCT__
4548 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4549 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4550 {
4551   PetscErrorCode       ierr;
4552   Mat_Merge_SeqsToMPI  *merge;
4553   PetscContainer       container;
4554 
4555   PetscFunctionBegin;
4556   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4557   if (container) {
4558     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4559     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4560     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4561     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4562     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4563     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4564     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4565     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4566     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4567     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4568     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4569     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4570     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4571     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4572     ierr = PetscFree(merge);CHKERRQ(ierr);
4573     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4574   }
4575   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4576   PetscFunctionReturn(0);
4577 }
4578 
4579 #include <../src/mat/utils/freespace.h>
4580 #include <petscbt.h>
4581 
4582 #undef __FUNCT__
4583 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4584 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4585 {
4586   PetscErrorCode       ierr;
4587   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4588   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4589   PetscMPIInt          size,rank,taga,*len_s;
4590   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4591   PetscInt             proc,m;
4592   PetscInt             **buf_ri,**buf_rj;
4593   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4594   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4595   MPI_Request          *s_waits,*r_waits;
4596   MPI_Status           *status;
4597   MatScalar            *aa=a->a;
4598   MatScalar            **abuf_r,*ba_i;
4599   Mat_Merge_SeqsToMPI  *merge;
4600   PetscContainer       container;
4601 
4602   PetscFunctionBegin;
4603   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4604 
4605   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4606   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4607 
4608   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4609   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4610 
4611   bi     = merge->bi;
4612   bj     = merge->bj;
4613   buf_ri = merge->buf_ri;
4614   buf_rj = merge->buf_rj;
4615 
4616   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4617   owners = merge->rowmap->range;
4618   len_s  = merge->len_s;
4619 
4620   /* send and recv matrix values */
4621   /*-----------------------------*/
4622   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4623   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4624 
4625   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4626   for (proc=0,k=0; proc<size; proc++) {
4627     if (!len_s[proc]) continue;
4628     i = owners[proc];
4629     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4630     k++;
4631   }
4632 
4633   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4634   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4635   ierr = PetscFree(status);CHKERRQ(ierr);
4636 
4637   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4638   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4639 
4640   /* insert mat values of mpimat */
4641   /*----------------------------*/
4642   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4643   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4644 
4645   for (k=0; k<merge->nrecv; k++) {
4646     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4647     nrows = *(buf_ri_k[k]);
4648     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4649     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4650   }
4651 
4652   /* set values of ba */
4653   m = merge->rowmap->n;
4654   for (i=0; i<m; i++) {
4655     arow = owners[rank] + i;
4656     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4657     bnzi = bi[i+1] - bi[i];
4658     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4659 
4660     /* add local non-zero vals of this proc's seqmat into ba */
4661     anzi = ai[arow+1] - ai[arow];
4662     aj   = a->j + ai[arow];
4663     aa   = a->a + ai[arow];
4664     nextaj = 0;
4665     for (j=0; nextaj<anzi; j++) {
4666       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4667         ba_i[j] += aa[nextaj++];
4668       }
4669     }
4670 
4671     /* add received vals into ba */
4672     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4673       /* i-th row */
4674       if (i == *nextrow[k]) {
4675         anzi = *(nextai[k]+1) - *nextai[k];
4676         aj   = buf_rj[k] + *(nextai[k]);
4677         aa   = abuf_r[k] + *(nextai[k]);
4678         nextaj = 0;
4679         for (j=0; nextaj<anzi; j++) {
4680           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4681             ba_i[j] += aa[nextaj++];
4682           }
4683         }
4684         nextrow[k]++; nextai[k]++;
4685       }
4686     }
4687     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4688   }
4689   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4690   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4691 
4692   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4693   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4694   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4695   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4696   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4697   PetscFunctionReturn(0);
4698 }
4699 
4700 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4701 
4702 #undef __FUNCT__
4703 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4704 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4705 {
4706   PetscErrorCode       ierr;
4707   Mat                  B_mpi;
4708   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4709   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4710   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4711   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4712   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4713   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4714   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4715   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4716   MPI_Status           *status;
4717   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4718   PetscBT              lnkbt;
4719   Mat_Merge_SeqsToMPI  *merge;
4720   PetscContainer       container;
4721 
4722   PetscFunctionBegin;
4723   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4724 
4725   /* make sure it is a PETSc comm */
4726   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4727   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4728   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4729 
4730   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4731   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4732 
4733   /* determine row ownership */
4734   /*---------------------------------------------------------*/
4735   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4736   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4737   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4738   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4739   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4740   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4741   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4742 
4743   m      = merge->rowmap->n;
4744   owners = merge->rowmap->range;
4745 
4746   /* determine the number of messages to send, their lengths */
4747   /*---------------------------------------------------------*/
4748   len_s  = merge->len_s;
4749 
4750   len = 0;  /* length of buf_si[] */
4751   merge->nsend = 0;
4752   for (proc=0; proc<size; proc++) {
4753     len_si[proc] = 0;
4754     if (proc == rank) {
4755       len_s[proc] = 0;
4756     } else {
4757       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4758       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4759     }
4760     if (len_s[proc]) {
4761       merge->nsend++;
4762       nrows = 0;
4763       for (i=owners[proc]; i<owners[proc+1]; i++) {
4764         if (ai[i+1] > ai[i]) nrows++;
4765       }
4766       len_si[proc] = 2*(nrows+1);
4767       len += len_si[proc];
4768     }
4769   }
4770 
4771   /* determine the number and length of messages to receive for ij-structure */
4772   /*-------------------------------------------------------------------------*/
4773   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4774   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4775 
4776   /* post the Irecv of j-structure */
4777   /*-------------------------------*/
4778   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4779   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4780 
4781   /* post the Isend of j-structure */
4782   /*--------------------------------*/
4783   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4784 
4785   for (proc=0, k=0; proc<size; proc++) {
4786     if (!len_s[proc]) continue;
4787     i = owners[proc];
4788     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4789     k++;
4790   }
4791 
4792   /* receives and sends of j-structure are complete */
4793   /*------------------------------------------------*/
4794   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4795   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4796 
4797   /* send and recv i-structure */
4798   /*---------------------------*/
4799   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4800   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4801 
4802   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4803   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4804   for (proc=0,k=0; proc<size; proc++) {
4805     if (!len_s[proc]) continue;
4806     /* form outgoing message for i-structure:
4807          buf_si[0]:                 nrows to be sent
4808                [1:nrows]:           row index (global)
4809                [nrows+1:2*nrows+1]: i-structure index
4810     */
4811     /*-------------------------------------------*/
4812     nrows = len_si[proc]/2 - 1;
4813     buf_si_i    = buf_si + nrows+1;
4814     buf_si[0]   = nrows;
4815     buf_si_i[0] = 0;
4816     nrows = 0;
4817     for (i=owners[proc]; i<owners[proc+1]; i++) {
4818       anzi = ai[i+1] - ai[i];
4819       if (anzi) {
4820         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4821         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4822         nrows++;
4823       }
4824     }
4825     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4826     k++;
4827     buf_si += len_si[proc];
4828   }
4829 
4830   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4831   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4832 
4833   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4834   for (i=0; i<merge->nrecv; i++) {
4835     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4836   }
4837 
4838   ierr = PetscFree(len_si);CHKERRQ(ierr);
4839   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4840   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4841   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4842   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4843   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4844   ierr = PetscFree(status);CHKERRQ(ierr);
4845 
4846   /* compute a local seq matrix in each processor */
4847   /*----------------------------------------------*/
4848   /* allocate bi array and free space for accumulating nonzero column info */
4849   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4850   bi[0] = 0;
4851 
4852   /* create and initialize a linked list */
4853   nlnk = N+1;
4854   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4855 
4856   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4857   len  = ai[owners[rank+1]] - ai[owners[rank]];
4858   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4859   current_space = free_space;
4860 
4861   /* determine symbolic info for each local row */
4862   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4863 
4864   for (k=0; k<merge->nrecv; k++) {
4865     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4866     nrows = *buf_ri_k[k];
4867     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4868     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4869   }
4870 
4871   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4872   len = 0;
4873   for (i=0;i<m;i++) {
4874     bnzi   = 0;
4875     /* add local non-zero cols of this proc's seqmat into lnk */
4876     arow   = owners[rank] + i;
4877     anzi   = ai[arow+1] - ai[arow];
4878     aj     = a->j + ai[arow];
4879     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4880     bnzi += nlnk;
4881     /* add received col data into lnk */
4882     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4883       if (i == *nextrow[k]) { /* i-th row */
4884         anzi = *(nextai[k]+1) - *nextai[k];
4885         aj   = buf_rj[k] + *nextai[k];
4886         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4887         bnzi += nlnk;
4888         nextrow[k]++; nextai[k]++;
4889       }
4890     }
4891     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4892 
4893     /* if free space is not available, make more free space */
4894     if (current_space->local_remaining<bnzi) {
4895       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4896       nspacedouble++;
4897     }
4898     /* copy data into free space, then initialize lnk */
4899     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4900     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4901 
4902     current_space->array           += bnzi;
4903     current_space->local_used      += bnzi;
4904     current_space->local_remaining -= bnzi;
4905 
4906     bi[i+1] = bi[i] + bnzi;
4907   }
4908 
4909   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4910 
4911   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4912   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4913   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4914 
4915   /* create symbolic parallel matrix B_mpi */
4916   /*---------------------------------------*/
4917     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4918   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4919   if (n==PETSC_DECIDE) {
4920     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4921   } else {
4922     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4923   }
4924   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4925   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4926   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4927   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4928   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4929 
4930   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4931   B_mpi->assembled     = PETSC_FALSE;
4932   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4933   merge->bi            = bi;
4934   merge->bj            = bj;
4935   merge->buf_ri        = buf_ri;
4936   merge->buf_rj        = buf_rj;
4937   merge->coi           = PETSC_NULL;
4938   merge->coj           = PETSC_NULL;
4939   merge->owners_co     = PETSC_NULL;
4940 
4941   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4942 
4943   /* attach the supporting struct to B_mpi for reuse */
4944   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4945   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4946   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4947   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4948   *mpimat = B_mpi;
4949 
4950   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4951   PetscFunctionReturn(0);
4952 }
4953 
4954 #undef __FUNCT__
4955 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4956 /*@C
4957       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4958                  matrices from each processor
4959 
4960     Collective on MPI_Comm
4961 
4962    Input Parameters:
4963 +    comm - the communicators the parallel matrix will live on
4964 .    seqmat - the input sequential matrices
4965 .    m - number of local rows (or PETSC_DECIDE)
4966 .    n - number of local columns (or PETSC_DECIDE)
4967 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4968 
4969    Output Parameter:
4970 .    mpimat - the parallel matrix generated
4971 
4972     Level: advanced
4973 
4974    Notes:
4975      The dimensions of the sequential matrix in each processor MUST be the same.
4976      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4977      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4978 @*/
4979 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4980 {
4981   PetscErrorCode   ierr;
4982   PetscMPIInt     size;
4983 
4984   PetscFunctionBegin;
4985   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4986   if (size == 1) {
4987      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4988      if (scall == MAT_INITIAL_MATRIX) {
4989        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4990      } else {
4991        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4992      }
4993      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4994      PetscFunctionReturn(0);
4995   }
4996   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4997   if (scall == MAT_INITIAL_MATRIX) {
4998     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4999   }
5000   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5001   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5002   PetscFunctionReturn(0);
5003 }
5004 
5005 #undef __FUNCT__
5006 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5007 /*@
5008      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5009           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5010           with MatGetSize()
5011 
5012     Not Collective
5013 
5014    Input Parameters:
5015 +    A - the matrix
5016 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5017 
5018    Output Parameter:
5019 .    A_loc - the local sequential matrix generated
5020 
5021     Level: developer
5022 
5023 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5024 
5025 @*/
5026 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5027 {
5028   PetscErrorCode  ierr;
5029   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
5030   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5031   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5032   MatScalar       *aa=a->a,*ba=b->a,*cam;
5033   PetscScalar     *ca;
5034   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5035   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
5036   PetscBool       match;
5037 
5038   PetscFunctionBegin;
5039   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5040   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5041   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5042   if (scall == MAT_INITIAL_MATRIX) {
5043     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5044     ci[0] = 0;
5045     for (i=0; i<am; i++) {
5046       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5047     }
5048     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5049     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5050     k = 0;
5051     for (i=0; i<am; i++) {
5052       ncols_o = bi[i+1] - bi[i];
5053       ncols_d = ai[i+1] - ai[i];
5054       /* off-diagonal portion of A */
5055       for (jo=0; jo<ncols_o; jo++) {
5056         col = cmap[*bj];
5057         if (col >= cstart) break;
5058         cj[k]   = col; bj++;
5059         ca[k++] = *ba++;
5060       }
5061       /* diagonal portion of A */
5062       for (j=0; j<ncols_d; j++) {
5063         cj[k]   = cstart + *aj++;
5064         ca[k++] = *aa++;
5065       }
5066       /* off-diagonal portion of A */
5067       for (j=jo; j<ncols_o; j++) {
5068         cj[k]   = cmap[*bj++];
5069         ca[k++] = *ba++;
5070       }
5071     }
5072     /* put together the new matrix */
5073     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5074     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5075     /* Since these are PETSc arrays, change flags to free them as necessary. */
5076     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5077     mat->free_a  = PETSC_TRUE;
5078     mat->free_ij = PETSC_TRUE;
5079     mat->nonew   = 0;
5080   } else if (scall == MAT_REUSE_MATRIX) {
5081     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5082     ci = mat->i; cj = mat->j; cam = mat->a;
5083     for (i=0; i<am; i++) {
5084       /* off-diagonal portion of A */
5085       ncols_o = bi[i+1] - bi[i];
5086       for (jo=0; jo<ncols_o; jo++) {
5087         col = cmap[*bj];
5088         if (col >= cstart) break;
5089         *cam++ = *ba++; bj++;
5090       }
5091       /* diagonal portion of A */
5092       ncols_d = ai[i+1] - ai[i];
5093       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5094       /* off-diagonal portion of A */
5095       for (j=jo; j<ncols_o; j++) {
5096         *cam++ = *ba++; bj++;
5097       }
5098     }
5099   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5100   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5101   PetscFunctionReturn(0);
5102 }
5103 
5104 #undef __FUNCT__
5105 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5106 /*@C
5107      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5108 
5109     Not Collective
5110 
5111    Input Parameters:
5112 +    A - the matrix
5113 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5114 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5115 
5116    Output Parameter:
5117 .    A_loc - the local sequential matrix generated
5118 
5119     Level: developer
5120 
5121 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5122 
5123 @*/
5124 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5125 {
5126   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5127   PetscErrorCode    ierr;
5128   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5129   IS                isrowa,iscola;
5130   Mat               *aloc;
5131   PetscBool       match;
5132 
5133   PetscFunctionBegin;
5134   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5135   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5136   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5137   if (!row) {
5138     start = A->rmap->rstart; end = A->rmap->rend;
5139     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5140   } else {
5141     isrowa = *row;
5142   }
5143   if (!col) {
5144     start = A->cmap->rstart;
5145     cmap  = a->garray;
5146     nzA   = a->A->cmap->n;
5147     nzB   = a->B->cmap->n;
5148     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5149     ncols = 0;
5150     for (i=0; i<nzB; i++) {
5151       if (cmap[i] < start) idx[ncols++] = cmap[i];
5152       else break;
5153     }
5154     imark = i;
5155     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5156     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5157     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5158   } else {
5159     iscola = *col;
5160   }
5161   if (scall != MAT_INITIAL_MATRIX) {
5162     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5163     aloc[0] = *A_loc;
5164   }
5165   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5166   *A_loc = aloc[0];
5167   ierr = PetscFree(aloc);CHKERRQ(ierr);
5168   if (!row) {
5169     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5170   }
5171   if (!col) {
5172     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5173   }
5174   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5175   PetscFunctionReturn(0);
5176 }
5177 
5178 #undef __FUNCT__
5179 #define __FUNCT__ "MatGetBrowsOfAcols"
5180 /*@C
5181     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5182 
5183     Collective on Mat
5184 
5185    Input Parameters:
5186 +    A,B - the matrices in mpiaij format
5187 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5188 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5189 
5190    Output Parameter:
5191 +    rowb, colb - index sets of rows and columns of B to extract
5192 -    B_seq - the sequential matrix generated
5193 
5194     Level: developer
5195 
5196 @*/
5197 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5198 {
5199   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5200   PetscErrorCode    ierr;
5201   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5202   IS                isrowb,iscolb;
5203   Mat               *bseq=PETSC_NULL;
5204 
5205   PetscFunctionBegin;
5206   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5207     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5208   }
5209   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5210 
5211   if (scall == MAT_INITIAL_MATRIX) {
5212     start = A->cmap->rstart;
5213     cmap  = a->garray;
5214     nzA   = a->A->cmap->n;
5215     nzB   = a->B->cmap->n;
5216     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5217     ncols = 0;
5218     for (i=0; i<nzB; i++) {  /* row < local row index */
5219       if (cmap[i] < start) idx[ncols++] = cmap[i];
5220       else break;
5221     }
5222     imark = i;
5223     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5224     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5225     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5226     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5227   } else {
5228     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5229     isrowb = *rowb; iscolb = *colb;
5230     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5231     bseq[0] = *B_seq;
5232   }
5233   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5234   *B_seq = bseq[0];
5235   ierr = PetscFree(bseq);CHKERRQ(ierr);
5236   if (!rowb) {
5237     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5238   } else {
5239     *rowb = isrowb;
5240   }
5241   if (!colb) {
5242     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5243   } else {
5244     *colb = iscolb;
5245   }
5246   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5247   PetscFunctionReturn(0);
5248 }
5249 
5250 #undef __FUNCT__
5251 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5252 /*
5253     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5254     of the OFF-DIAGONAL portion of local A
5255 
5256     Collective on Mat
5257 
5258    Input Parameters:
5259 +    A,B - the matrices in mpiaij format
5260 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5261 
5262    Output Parameter:
5263 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5264 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5265 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5266 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5267 
5268     Level: developer
5269 
5270 */
5271 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5272 {
5273   VecScatter_MPI_General *gen_to,*gen_from;
5274   PetscErrorCode         ierr;
5275   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5276   Mat_SeqAIJ             *b_oth;
5277   VecScatter             ctx=a->Mvctx;
5278   MPI_Comm               comm=((PetscObject)ctx)->comm;
5279   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5280   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5281   PetscScalar            *rvalues,*svalues;
5282   MatScalar              *b_otha,*bufa,*bufA;
5283   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5284   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5285   MPI_Status             *sstatus,rstatus;
5286   PetscMPIInt            jj;
5287   PetscInt               *cols,sbs,rbs;
5288   PetscScalar            *vals;
5289 
5290   PetscFunctionBegin;
5291   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5292     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5293   }
5294   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5295   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5296 
5297   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5298   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5299   rvalues  = gen_from->values; /* holds the length of receiving row */
5300   svalues  = gen_to->values;   /* holds the length of sending row */
5301   nrecvs   = gen_from->n;
5302   nsends   = gen_to->n;
5303 
5304   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5305   srow     = gen_to->indices;   /* local row index to be sent */
5306   sstarts  = gen_to->starts;
5307   sprocs   = gen_to->procs;
5308   sstatus  = gen_to->sstatus;
5309   sbs      = gen_to->bs;
5310   rstarts  = gen_from->starts;
5311   rprocs   = gen_from->procs;
5312   rbs      = gen_from->bs;
5313 
5314   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5315   if (scall == MAT_INITIAL_MATRIX) {
5316     /* i-array */
5317     /*---------*/
5318     /*  post receives */
5319     for (i=0; i<nrecvs; i++) {
5320       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5321       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5322       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5323     }
5324 
5325     /* pack the outgoing message */
5326     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5327     sstartsj[0] = 0;  rstartsj[0] = 0;
5328     len = 0; /* total length of j or a array to be sent */
5329     k = 0;
5330     for (i=0; i<nsends; i++) {
5331       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5332       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5333       for (j=0; j<nrows; j++) {
5334         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5335         for (l=0; l<sbs; l++) {
5336           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5337           rowlen[j*sbs+l] = ncols;
5338           len += ncols;
5339           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5340         }
5341         k++;
5342       }
5343       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5344       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5345     }
5346     /* recvs and sends of i-array are completed */
5347     i = nrecvs;
5348     while (i--) {
5349       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5350     }
5351     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5352 
5353     /* allocate buffers for sending j and a arrays */
5354     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5355     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5356 
5357     /* create i-array of B_oth */
5358     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5359     b_othi[0] = 0;
5360     len = 0; /* total length of j or a array to be received */
5361     k = 0;
5362     for (i=0; i<nrecvs; i++) {
5363       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5364       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5365       for (j=0; j<nrows; j++) {
5366         b_othi[k+1] = b_othi[k] + rowlen[j];
5367         len += rowlen[j]; k++;
5368       }
5369       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5370     }
5371 
5372     /* allocate space for j and a arrrays of B_oth */
5373     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5374     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5375 
5376     /* j-array */
5377     /*---------*/
5378     /*  post receives of j-array */
5379     for (i=0; i<nrecvs; i++) {
5380       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5381       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5382     }
5383 
5384     /* pack the outgoing message j-array */
5385     k = 0;
5386     for (i=0; i<nsends; i++) {
5387       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5388       bufJ = bufj+sstartsj[i];
5389       for (j=0; j<nrows; j++) {
5390         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5391         for (ll=0; ll<sbs; ll++) {
5392           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5393           for (l=0; l<ncols; l++) {
5394             *bufJ++ = cols[l];
5395           }
5396           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5397         }
5398       }
5399       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5400     }
5401 
5402     /* recvs and sends of j-array are completed */
5403     i = nrecvs;
5404     while (i--) {
5405       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5406     }
5407     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5408   } else if (scall == MAT_REUSE_MATRIX) {
5409     sstartsj = *startsj_s;
5410     rstartsj = *startsj_r;
5411     bufa     = *bufa_ptr;
5412     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5413     b_otha   = b_oth->a;
5414   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5415 
5416   /* a-array */
5417   /*---------*/
5418   /*  post receives of a-array */
5419   for (i=0; i<nrecvs; i++) {
5420     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5421     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5422   }
5423 
5424   /* pack the outgoing message a-array */
5425   k = 0;
5426   for (i=0; i<nsends; i++) {
5427     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5428     bufA = bufa+sstartsj[i];
5429     for (j=0; j<nrows; j++) {
5430       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5431       for (ll=0; ll<sbs; ll++) {
5432         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5433         for (l=0; l<ncols; l++) {
5434           *bufA++ = vals[l];
5435         }
5436         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5437       }
5438     }
5439     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5440   }
5441   /* recvs and sends of a-array are completed */
5442   i = nrecvs;
5443   while (i--) {
5444     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5445   }
5446   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5447   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5448 
5449   if (scall == MAT_INITIAL_MATRIX) {
5450     /* put together the new matrix */
5451     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5452 
5453     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5454     /* Since these are PETSc arrays, change flags to free them as necessary. */
5455     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5456     b_oth->free_a  = PETSC_TRUE;
5457     b_oth->free_ij = PETSC_TRUE;
5458     b_oth->nonew   = 0;
5459 
5460     ierr = PetscFree(bufj);CHKERRQ(ierr);
5461     if (!startsj_s || !bufa_ptr) {
5462       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5463       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5464     } else {
5465       *startsj_s = sstartsj;
5466       *startsj_r = rstartsj;
5467       *bufa_ptr  = bufa;
5468     }
5469   }
5470   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5471   PetscFunctionReturn(0);
5472 }
5473 
5474 #undef __FUNCT__
5475 #define __FUNCT__ "MatGetCommunicationStructs"
5476 /*@C
5477   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5478 
5479   Not Collective
5480 
5481   Input Parameters:
5482 . A - The matrix in mpiaij format
5483 
5484   Output Parameter:
5485 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5486 . colmap - A map from global column index to local index into lvec
5487 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5488 
5489   Level: developer
5490 
5491 @*/
5492 #if defined (PETSC_USE_CTABLE)
5493 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5494 #else
5495 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5496 #endif
5497 {
5498   Mat_MPIAIJ *a;
5499 
5500   PetscFunctionBegin;
5501   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5502   PetscValidPointer(lvec, 2);
5503   PetscValidPointer(colmap, 3);
5504   PetscValidPointer(multScatter, 4);
5505   a = (Mat_MPIAIJ *) A->data;
5506   if (lvec) *lvec = a->lvec;
5507   if (colmap) *colmap = a->colmap;
5508   if (multScatter) *multScatter = a->Mvctx;
5509   PetscFunctionReturn(0);
5510 }
5511 
5512 EXTERN_C_BEGIN
5513 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5514 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5515 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5516 EXTERN_C_END
5517 
5518 #undef __FUNCT__
5519 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5520 /*
5521     Computes (B'*A')' since computing B*A directly is untenable
5522 
5523                n                       p                          p
5524         (              )       (              )         (                  )
5525       m (      A       )  *  n (       B      )   =   m (         C        )
5526         (              )       (              )         (                  )
5527 
5528 */
5529 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5530 {
5531   PetscErrorCode     ierr;
5532   Mat                At,Bt,Ct;
5533 
5534   PetscFunctionBegin;
5535   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5536   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5537   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5538   ierr = MatDestroy(&At);CHKERRQ(ierr);
5539   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5540   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5541   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5542   PetscFunctionReturn(0);
5543 }
5544 
5545 #undef __FUNCT__
5546 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5547 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5548 {
5549   PetscErrorCode ierr;
5550   PetscInt       m=A->rmap->n,n=B->cmap->n;
5551   Mat            Cmat;
5552 
5553   PetscFunctionBegin;
5554   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5555   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5556   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5557   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5558   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5559   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5560   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5561   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5562 
5563   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5564   *C = Cmat;
5565   PetscFunctionReturn(0);
5566 }
5567 
5568 /* ----------------------------------------------------------------*/
5569 #undef __FUNCT__
5570 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5571 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5572 {
5573   PetscErrorCode ierr;
5574 
5575   PetscFunctionBegin;
5576   if (scall == MAT_INITIAL_MATRIX) {
5577     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5578   }
5579   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5580   PetscFunctionReturn(0);
5581 }
5582 
5583 EXTERN_C_BEGIN
5584 #if defined(PETSC_HAVE_MUMPS)
5585 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5586 #endif
5587 #if defined(PETSC_HAVE_PASTIX)
5588 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5589 #endif
5590 #if defined(PETSC_HAVE_SUPERLU_DIST)
5591 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5592 #endif
5593 #if defined(PETSC_HAVE_CLIQUE)
5594 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5595 #endif
5596 EXTERN_C_END
5597 
5598 /*MC
5599    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5600 
5601    Options Database Keys:
5602 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5603 
5604   Level: beginner
5605 
5606 .seealso: MatCreateAIJ()
5607 M*/
5608 
5609 EXTERN_C_BEGIN
5610 #undef __FUNCT__
5611 #define __FUNCT__ "MatCreate_MPIAIJ"
5612 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5613 {
5614   Mat_MPIAIJ     *b;
5615   PetscErrorCode ierr;
5616   PetscMPIInt    size;
5617 
5618   PetscFunctionBegin;
5619   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5620   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5621   B->data         = (void*)b;
5622   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5623   B->assembled    = PETSC_FALSE;
5624   B->insertmode   = NOT_SET_VALUES;
5625   b->size         = size;
5626   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5627 
5628   /* build cache for off array entries formed */
5629   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5630   b->donotstash  = PETSC_FALSE;
5631   b->colmap      = 0;
5632   b->garray      = 0;
5633   b->roworiented = PETSC_TRUE;
5634 
5635   /* stuff used for matrix vector multiply */
5636   b->lvec      = PETSC_NULL;
5637   b->Mvctx     = PETSC_NULL;
5638 
5639   /* stuff for MatGetRow() */
5640   b->rowindices   = 0;
5641   b->rowvalues    = 0;
5642   b->getrowactive = PETSC_FALSE;
5643 
5644   /* flexible pointer used in CUSP/CUSPARSE classes */
5645   b->spptr        = PETSC_NULL;
5646 
5647 #if defined(PETSC_HAVE_MUMPS)
5648   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5649                                      "MatGetFactor_aij_mumps",
5650                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5651 #endif
5652 #if defined(PETSC_HAVE_PASTIX)
5653   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5654                                            "MatGetFactor_mpiaij_pastix",
5655                                            MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5656 #endif
5657 #if defined(PETSC_HAVE_SUPERLU_DIST)
5658   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5659                                      "MatGetFactor_mpiaij_superlu_dist",
5660                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5661 #endif
5662 #if defined(PETSC_HAVE_CLIQUE)
5663   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5664                                      "MatGetFactor_aij_clique",
5665                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5666 #endif
5667   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5668                                      "MatStoreValues_MPIAIJ",
5669                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5670   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5671                                      "MatRetrieveValues_MPIAIJ",
5672                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5673   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5674                                      "MatGetDiagonalBlock_MPIAIJ",
5675                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5676   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5677                                      "MatIsTranspose_MPIAIJ",
5678                                      MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5679   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5680                                      "MatMPIAIJSetPreallocation_MPIAIJ",
5681                                      MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5682   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5683                                      "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5684                                      MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5685   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5686                                      "MatDiagonalScaleLocal_MPIAIJ",
5687                                      MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5688   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5689                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5690                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5691   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5692                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5693                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5694   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5695                                      "MatConvert_MPIAIJ_MPISBAIJ",
5696                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5697   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5698                                      "MatMatMult_MPIDense_MPIAIJ",
5699                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5700   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5701                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5702                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5703   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5704                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5705                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5706   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5707   PetscFunctionReturn(0);
5708 }
5709 EXTERN_C_END
5710 
5711 #undef __FUNCT__
5712 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5713 /*@
5714      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5715          and "off-diagonal" part of the matrix in CSR format.
5716 
5717    Collective on MPI_Comm
5718 
5719    Input Parameters:
5720 +  comm - MPI communicator
5721 .  m - number of local rows (Cannot be PETSC_DECIDE)
5722 .  n - This value should be the same as the local size used in creating the
5723        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5724        calculated if N is given) For square matrices n is almost always m.
5725 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5726 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5727 .   i - row indices for "diagonal" portion of matrix
5728 .   j - column indices
5729 .   a - matrix values
5730 .   oi - row indices for "off-diagonal" portion of matrix
5731 .   oj - column indices
5732 -   oa - matrix values
5733 
5734    Output Parameter:
5735 .   mat - the matrix
5736 
5737    Level: advanced
5738 
5739    Notes:
5740        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5741        must free the arrays once the matrix has been destroyed and not before.
5742 
5743        The i and j indices are 0 based
5744 
5745        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5746 
5747        This sets local rows and cannot be used to set off-processor values.
5748 
5749        You cannot later use MatSetValues() to change values in this matrix.
5750 
5751 .keywords: matrix, aij, compressed row, sparse, parallel
5752 
5753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5754           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5755 @*/
5756 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5757                                                PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5758 {
5759   PetscErrorCode ierr;
5760   Mat_MPIAIJ     *maij;
5761 
5762   PetscFunctionBegin;
5763   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5764   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5765   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5766   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5767   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5768   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5769   maij = (Mat_MPIAIJ*) (*mat)->data;
5770   maij->donotstash     = PETSC_TRUE;
5771   (*mat)->preallocated = PETSC_TRUE;
5772 
5773   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5774   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5775 
5776   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5777   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5778 
5779   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5780   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5781   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5782   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5783 
5784   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5785   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5786   PetscFunctionReturn(0);
5787 }
5788 
5789 /*
5790     Special version for direct calls from Fortran
5791 */
5792 #include <petsc-private/fortranimpl.h>
5793 
5794 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5795 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5796 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5797 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5798 #endif
5799 
5800 /* Change these macros so can be used in void function */
5801 #undef CHKERRQ
5802 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5803 #undef SETERRQ2
5804 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5805 #undef SETERRQ3
5806 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5807 #undef SETERRQ
5808 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5809 
5810 EXTERN_C_BEGIN
5811 #undef __FUNCT__
5812 #define __FUNCT__ "matsetvaluesmpiaij_"
5813 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5814 {
5815   Mat             mat = *mmat;
5816   PetscInt        m = *mm, n = *mn;
5817   InsertMode      addv = *maddv;
5818   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5819   PetscScalar     value;
5820   PetscErrorCode  ierr;
5821 
5822   MatCheckPreallocated(mat,1);
5823   if (mat->insertmode == NOT_SET_VALUES) {
5824     mat->insertmode = addv;
5825   }
5826 #if defined(PETSC_USE_DEBUG)
5827   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5828 #endif
5829   {
5830   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5831   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5832   PetscBool       roworiented = aij->roworiented;
5833 
5834   /* Some Variables required in the macro */
5835   Mat             A = aij->A;
5836   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5837   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5838   MatScalar       *aa = a->a;
5839   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5840   Mat             B = aij->B;
5841   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5842   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5843   MatScalar       *ba = b->a;
5844 
5845   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5846   PetscInt        nonew = a->nonew;
5847   MatScalar       *ap1,*ap2;
5848 
5849   PetscFunctionBegin;
5850   for (i=0; i<m; i++) {
5851     if (im[i] < 0) continue;
5852 #if defined(PETSC_USE_DEBUG)
5853     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5854 #endif
5855     if (im[i] >= rstart && im[i] < rend) {
5856       row      = im[i] - rstart;
5857       lastcol1 = -1;
5858       rp1      = aj + ai[row];
5859       ap1      = aa + ai[row];
5860       rmax1    = aimax[row];
5861       nrow1    = ailen[row];
5862       low1     = 0;
5863       high1    = nrow1;
5864       lastcol2 = -1;
5865       rp2      = bj + bi[row];
5866       ap2      = ba + bi[row];
5867       rmax2    = bimax[row];
5868       nrow2    = bilen[row];
5869       low2     = 0;
5870       high2    = nrow2;
5871 
5872       for (j=0; j<n; j++) {
5873         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5874         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5875         if (in[j] >= cstart && in[j] < cend) {
5876           col = in[j] - cstart;
5877           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5878         } else if (in[j] < 0) continue;
5879 #if defined(PETSC_USE_DEBUG)
5880         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5881 #endif
5882         else {
5883           if (mat->was_assembled) {
5884             if (!aij->colmap) {
5885               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5886             }
5887 #if defined (PETSC_USE_CTABLE)
5888             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5889             col--;
5890 #else
5891             col = aij->colmap[in[j]] - 1;
5892 #endif
5893             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5894               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5895               col =  in[j];
5896               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5897               B = aij->B;
5898               b = (Mat_SeqAIJ*)B->data;
5899               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5900               rp2      = bj + bi[row];
5901               ap2      = ba + bi[row];
5902               rmax2    = bimax[row];
5903               nrow2    = bilen[row];
5904               low2     = 0;
5905               high2    = nrow2;
5906               bm       = aij->B->rmap->n;
5907               ba = b->a;
5908             }
5909           } else col = in[j];
5910           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5911         }
5912       }
5913     } else {
5914       if (!aij->donotstash) {
5915         if (roworiented) {
5916           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5917         } else {
5918           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5919         }
5920       }
5921     }
5922   }}
5923   PetscFunctionReturnVoid();
5924 }
5925 EXTERN_C_END
5926 
5927