xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5edf65166e22df6feda2ed6bd8a6e348419dee17)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233 	for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236 	  jj++;
237 	}
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267 	for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270 	  jj++;
271 	}
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++){
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else             low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414 { \
415     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
416     lastcol2 = col;\
417     while (high2-low2 > 5) { \
418       t = (low2+high2)/2; \
419       if (rp2[t] > col) high2 = t; \
420       else             low2  = t; \
421     } \
422     for (_i=low2; _i<high2; _i++) {		\
423       if (rp2[_i] > col) break;			\
424       if (rp2[_i] == col) {			      \
425 	if (addv == ADD_VALUES) ap2[_i] += value;     \
426 	else                    ap2[_i] = value;      \
427 	goto b_noinsert;			      \
428       }						      \
429     }							      \
430     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
432     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
433     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
434     N = nrow2++ - 1; b->nz++; high2++;					\
435     /* shift up all the later entries in this row */			\
436     for (ii=N; ii>=_i; ii--) {						\
437       rp2[ii+1] = rp2[ii];						\
438       ap2[ii+1] = ap2[ii];						\
439     }									\
440     rp2[_i] = col;							\
441     ap2[_i] = value;							\
442     b_noinsert: ;								\
443     bilen[row] = nrow2;							\
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat            A = aij->A;
487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
488   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar      *aa = a->a;
490   PetscBool      ignorezeroentries = a->ignorezeroentries;
491   Mat            B = aij->B;
492   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
493   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar      *ba = b->a;
495 
496   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt       nonew;
498   MatScalar      *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend){
528           col = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined (PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542 	    col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B = aij->B;
551               b = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2      = bj + bi[row];
554               ap2      = ba + bi[row];
555               rmax2    = bimax[row];
556               nrow2    = bilen[row];
557               low2     = 0;
558               high2    = nrow2;
559               bm       = aij->B->rmap->n;
560               ba = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend){
601           col = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined (PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col --;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else {
620       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscErrorCode ierr;
634   PetscInt       nstash,reallocs;
635   InsertMode     addv;
636 
637   PetscFunctionBegin;
638   if (aij->donotstash || mat->nooffprocentries) {
639     PetscFunctionReturn(0);
640   }
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n;) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
724 {
725   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
726   PetscErrorCode ierr;
727 
728   PetscFunctionBegin;
729   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
730   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatZeroRows_MPIAIJ"
736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
737 {
738   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
739   PetscErrorCode    ierr;
740   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
741   PetscInt          i,*owners = A->rmap->range;
742   PetscInt          *nprocs,j,idx,nsends,row;
743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
744   PetscInt          *rvalues,count,base,slen,*source;
745   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
746   MPI_Comm          comm = ((PetscObject)A)->comm;
747   MPI_Request       *send_waits,*recv_waits;
748   MPI_Status        recv_status,*send_status;
749   const PetscScalar *xx;
750   PetscScalar       *bb;
751 #if defined(PETSC_DEBUG)
752   PetscBool      found = PETSC_FALSE;
753 #endif
754 
755   PetscFunctionBegin;
756   /*  first count number of contributors to each processor */
757   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
758   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
759   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
760   j = 0;
761   for (i=0; i<N; i++) {
762     if (lastidx > (idx = rows[i])) j = 0;
763     lastidx = idx;
764     for (; j<size; j++) {
765       if (idx >= owners[j] && idx < owners[j+1]) {
766         nprocs[2*j]++;
767         nprocs[2*j+1] = 1;
768         owner[i] = j;
769 #if defined(PETSC_DEBUG)
770         found = PETSC_TRUE;
771 #endif
772         break;
773       }
774     }
775 #if defined(PETSC_DEBUG)
776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
777     found = PETSC_FALSE;
778 #endif
779   }
780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
781 
782   if (A->nooffproczerorows) {
783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
784     nrecvs = nsends;
785     nmax   = N;
786   } else {
787     /* inform other processors of number of messages and max length*/
788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
789   }
790 
791   /* post receives:   */
792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
794   for (i=0; i<nrecvs; i++) {
795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
796   }
797 
798   /* do sends:
799       1) starts[i] gives the starting index in svalues for stuff going to
800          the ith processor
801   */
802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
805   starts[0] = 0;
806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
807   for (i=0; i<N; i++) {
808     svalues[starts[owner[i]]++] = rows[i];
809   }
810 
811   starts[0] = 0;
812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
813   count = 0;
814   for (i=0; i<size; i++) {
815     if (nprocs[2*i+1]) {
816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
817     }
818   }
819   ierr = PetscFree(starts);CHKERRQ(ierr);
820 
821   base = owners[rank];
822 
823   /*  wait on receives */
824   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
825   count  = nrecvs; slen = 0;
826   while (count) {
827     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
828     /* unpack receives into our local space */
829     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
830     source[imdex]  = recv_status.MPI_SOURCE;
831     lens[imdex]    = n;
832     slen          += n;
833     count--;
834   }
835   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
836 
837   /* move the data into the send scatter */
838   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
839   count = 0;
840   for (i=0; i<nrecvs; i++) {
841     values = rvalues + i*nmax;
842     for (j=0; j<lens[i]; j++) {
843       lrows[count++] = values[j] - base;
844     }
845   }
846   ierr = PetscFree(rvalues);CHKERRQ(ierr);
847   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
848   ierr = PetscFree(owner);CHKERRQ(ierr);
849   ierr = PetscFree(nprocs);CHKERRQ(ierr);
850 
851   /* fix right hand side if needed */
852   if (x && b) {
853     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
855     for (i=0; i<slen; i++) {
856       bb[lrows[i]] = diag*xx[lrows[i]];
857     }
858     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
859     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
860   }
861   /*
862         Zero the required rows. If the "diagonal block" of the matrix
863      is square and the user wishes to set the diagonal we use separate
864      code so that MatSetValues() is not called for each diagonal allocating
865      new memory, thus calling lots of mallocs and slowing things down.
866 
867   */
868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
869   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
871     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
872   } else if (diag != 0.0) {
873     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
875       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
877     }
878     for (i = 0; i < slen; i++) {
879       row  = lrows[i] + rstart;
880       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
881     }
882     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884   } else {
885     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(lrows);CHKERRQ(ierr);
888 
889   /* wait on sends */
890   if (nsends) {
891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
893     ierr = PetscFree(send_status);CHKERRQ(ierr);
894   }
895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
896   ierr = PetscFree(svalues);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
903 {
904   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
905   PetscErrorCode    ierr;
906   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
907   PetscInt          i,*owners = A->rmap->range;
908   PetscInt          *nprocs,j,idx,nsends;
909   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
910   PetscInt          *rvalues,count,base,slen,*source;
911   PetscInt          *lens,*lrows,*values,m;
912   MPI_Comm          comm = ((PetscObject)A)->comm;
913   MPI_Request       *send_waits,*recv_waits;
914   MPI_Status        recv_status,*send_status;
915   const PetscScalar *xx;
916   PetscScalar       *bb,*mask;
917   Vec               xmask,lmask;
918   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
919   const PetscInt    *aj, *ii,*ridx;
920   PetscScalar       *aa;
921 #if defined(PETSC_DEBUG)
922   PetscBool         found = PETSC_FALSE;
923 #endif
924 
925   PetscFunctionBegin;
926   /*  first count number of contributors to each processor */
927   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
928   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
929   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
930   j = 0;
931   for (i=0; i<N; i++) {
932     if (lastidx > (idx = rows[i])) j = 0;
933     lastidx = idx;
934     for (; j<size; j++) {
935       if (idx >= owners[j] && idx < owners[j+1]) {
936         nprocs[2*j]++;
937         nprocs[2*j+1] = 1;
938         owner[i] = j;
939 #if defined(PETSC_DEBUG)
940         found = PETSC_TRUE;
941 #endif
942         break;
943       }
944     }
945 #if defined(PETSC_DEBUG)
946     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
947     found = PETSC_FALSE;
948 #endif
949   }
950   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
951 
952   /* inform other processors of number of messages and max length*/
953   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
954 
955   /* post receives:   */
956   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
957   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
958   for (i=0; i<nrecvs; i++) {
959     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
960   }
961 
962   /* do sends:
963       1) starts[i] gives the starting index in svalues for stuff going to
964          the ith processor
965   */
966   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
967   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
968   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
969   starts[0] = 0;
970   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
971   for (i=0; i<N; i++) {
972     svalues[starts[owner[i]]++] = rows[i];
973   }
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count  = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994     source[imdex]  = recv_status.MPI_SOURCE;
995     lens[imdex]    = n;
996     slen          += n;
997     count--;
998   }
999   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1000 
1001   /* move the data into the send scatter */
1002   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1003   count = 0;
1004   for (i=0; i<nrecvs; i++) {
1005     values = rvalues + i*nmax;
1006     for (j=0; j<lens[i]; j++) {
1007       lrows[count++] = values[j] - base;
1008     }
1009   }
1010   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1011   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1012   ierr = PetscFree(owner);CHKERRQ(ierr);
1013   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1014   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1015 
1016   /* zero diagonal part of matrix */
1017   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1018 
1019   /* handle off diagonal part of matrix */
1020   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1021   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1022   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1023   for (i=0; i<slen; i++) {
1024     bb[lrows[i]] = 1;
1025   }
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use){
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++){
1050       n   = ii[i+1] - ii[i];
1051       aj  = aij->j + ii[i];
1052       aa  = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa        = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n   = ii[i+1] - ii[i];
1068       aj  = aij->j + ii[i];
1069       aa  = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa    = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1179 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1180 {
1181   MPI_Comm       comm;
1182   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1183   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184   IS             Me,Notme;
1185   PetscErrorCode ierr;
1186   PetscInt       M,N,first,last,*notme,i;
1187   PetscMPIInt    size;
1188 
1189   PetscFunctionBegin;
1190 
1191   /* Easy test: symmetric diagonal block */
1192   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1193   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1194   if (!*f) PetscFunctionReturn(0);
1195   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1197   if (size == 1) PetscFunctionReturn(0);
1198 
1199   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1200   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1201   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1202   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1203   for (i=0; i<first; i++) notme[i] = i;
1204   for (i=last; i<M; i++) notme[i-last+first] = i;
1205   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1206   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1207   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1208   Aoff = Aoffs[0];
1209   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1210   Boff = Boffs[0];
1211   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1212   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1213   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1214   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1215   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1216   ierr = PetscFree(notme);CHKERRQ(ierr);
1217    PetscFunctionReturn(0);
1218 }
1219 EXTERN_C_END
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1224 {
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   /* do nondiagonal part */
1230   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1231   /* send it on its way */
1232   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   /* do local part */
1234   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1235   /* receive remote parts */
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*
1241   This only works correctly for square matrices where the subblock A->A is the
1242    diagonal block
1243 */
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1247 {
1248   PetscErrorCode ierr;
1249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1250 
1251   PetscFunctionBegin;
1252   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1253   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatScale_MPIAIJ"
1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1261 {
1262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1267   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDestroy_MPIAIJ"
1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1274 {
1275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279 #if defined(PETSC_USE_LOG)
1280   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1281 #endif
1282   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1283   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1284   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1285   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1286 #if defined (PETSC_USE_CTABLE)
1287   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1288 #else
1289   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1292   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1293   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1294   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1295   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1313 {
1314   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1316   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1319   int               fd;
1320   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1321   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1322   PetscScalar       *column_values;
1323   PetscInt          message_count,flowcontrolcount;
1324   FILE              *file;
1325 
1326   PetscFunctionBegin;
1327   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1328   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1329   nz   = A->nz + B->nz;
1330   if (!rank) {
1331     header[0] = MAT_FILE_CLASSID;
1332     header[1] = mat->rmap->N;
1333     header[2] = mat->cmap->N;
1334     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1335     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1336     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     /* get largest number of rows any processor has */
1338     rlen = mat->rmap->n;
1339     range = mat->rmap->range;
1340     for (i=1; i<size; i++) {
1341       rlen = PetscMax(rlen,range[i+1] - range[i]);
1342     }
1343   } else {
1344     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     rlen = mat->rmap->n;
1346   }
1347 
1348   /* load up the local row counts */
1349   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1350   for (i=0; i<mat->rmap->n; i++) {
1351     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352   }
1353 
1354   /* store the row lengths to the file */
1355   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1356   if (!rank) {
1357     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     for (i=1; i<size; i++) {
1359       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1360       rlen = range[i+1] - range[i];
1361       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1362       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1363     }
1364     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1367     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1368     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1369   }
1370   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1371 
1372   /* load up the local column indices */
1373   nzmax = nz; /* )th processor needs space a largest processor needs */
1374   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1376   cnt  = 0;
1377   for (i=0; i<mat->rmap->n; i++) {
1378     for (j=B->i[i]; j<B->i[i+1]; j++) {
1379       if ( (col = garray[B->j[j]]) > cstart) break;
1380       column_indices[cnt++] = col;
1381     }
1382     for (k=A->i[i]; k<A->i[i+1]; k++) {
1383       column_indices[cnt++] = A->j[k] + cstart;
1384     }
1385     for (; j<B->i[i+1]; j++) {
1386       column_indices[cnt++] = garray[B->j[j]];
1387     }
1388   }
1389   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1390 
1391   /* store the column indices to the file */
1392    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1393   if (!rank) {
1394     MPI_Status status;
1395     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1396     for (i=1; i<size; i++) {
1397       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1398       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1399       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1400       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1401       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1402     }
1403      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1406     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1408     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1409   }
1410   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1411 
1412   /* load up the local column values */
1413   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1414   cnt  = 0;
1415   for (i=0; i<mat->rmap->n; i++) {
1416     for (j=B->i[i]; j<B->i[i+1]; j++) {
1417       if ( garray[B->j[j]] > cstart) break;
1418       column_values[cnt++] = B->a[j];
1419     }
1420     for (k=A->i[i]; k<A->i[i+1]; k++) {
1421       column_values[cnt++] = A->a[k];
1422     }
1423     for (; j<B->i[i+1]; j++) {
1424       column_values[cnt++] = B->a[j];
1425     }
1426   }
1427   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1428 
1429   /* store the column values to the file */
1430    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1431   if (!rank) {
1432     MPI_Status status;
1433     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1434     for (i=1; i<size; i++) {
1435        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1436       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1437       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1438       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1439       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1440     }
1441     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1442   } else {
1443     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1444     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1445     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1446     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1447   }
1448   ierr = PetscFree(column_values);CHKERRQ(ierr);
1449 
1450   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1451   if (file) {
1452     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1453   }
1454 
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1461 {
1462   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1463   PetscErrorCode    ierr;
1464   PetscMPIInt       rank = aij->rank,size = aij->size;
1465   PetscBool         isdraw,iascii,isbinary;
1466   PetscViewer       sviewer;
1467   PetscViewerFormat format;
1468 
1469   PetscFunctionBegin;
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1473   if (iascii) {
1474     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1475     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1476       MatInfo    info;
1477       PetscBool  inodes;
1478 
1479       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1480       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1482       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1483       if (!inodes) {
1484         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1485 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1486       } else {
1487         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1488 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1489       }
1490       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1491       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1492       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1493       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1494       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1495       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1497       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1498       PetscFunctionReturn(0);
1499     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1500       PetscInt   inodecount,inodelimit,*inodes;
1501       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1502       if (inodes) {
1503         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1504       } else {
1505         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1506       }
1507       PetscFunctionReturn(0);
1508     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1509       PetscFunctionReturn(0);
1510     }
1511   } else if (isbinary) {
1512     if (size == 1) {
1513       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1514       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1515     } else {
1516       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1517     }
1518     PetscFunctionReturn(0);
1519   } else if (isdraw) {
1520     PetscDraw  draw;
1521     PetscBool  isnull;
1522     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1523     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1524   }
1525 
1526   if (size == 1) {
1527     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1528     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1529   } else {
1530     /* assemble the entire matrix onto first processor. */
1531     Mat         A;
1532     Mat_SeqAIJ  *Aloc;
1533     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1534     MatScalar   *a;
1535 
1536     if (mat->rmap->N > 1024) {
1537       PetscBool  flg = PETSC_FALSE;
1538 
1539       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1540       if (!flg) {
1541         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1542       }
1543     }
1544 
1545     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1546     if (!rank) {
1547       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1548     } else {
1549       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1550     }
1551     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1552     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1553     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1554     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1555     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1556 
1557     /* copy over the A part */
1558     Aloc = (Mat_SeqAIJ*)aij->A->data;
1559     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1560     row = mat->rmap->rstart;
1561     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1562     for (i=0; i<m; i++) {
1563       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1564       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1565     }
1566     aj = Aloc->j;
1567     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1568 
1569     /* copy over the B part */
1570     Aloc = (Mat_SeqAIJ*)aij->B->data;
1571     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1572     row  = mat->rmap->rstart;
1573     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1574     ct   = cols;
1575     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1576     for (i=0; i<m; i++) {
1577       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1578       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1579     }
1580     ierr = PetscFree(ct);CHKERRQ(ierr);
1581     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583     /*
1584        Everyone has to call to draw the matrix since the graphics waits are
1585        synchronized across all processors that share the PetscDraw object
1586     */
1587     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1588     if (!rank) {
1589       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1590       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1591       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1592       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1593     }
1594     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1595     ierr = MatDestroy(&A);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatView_MPIAIJ"
1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1603 {
1604   PetscErrorCode ierr;
1605   PetscBool      iascii,isdraw,issocket,isbinary;
1606 
1607   PetscFunctionBegin;
1608   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1609   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1610   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1611   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1612   if (iascii || isdraw || isbinary || issocket) {
1613     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1614   } else {
1615     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatSOR_MPIAIJ"
1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   PetscErrorCode ierr;
1626   Vec            bb1 = 0;
1627   PetscBool      hasop;
1628 
1629   PetscFunctionBegin;
1630   if (flag == SOR_APPLY_UPPER) {
1631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1632     PetscFunctionReturn(0);
1633   }
1634 
1635   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1636     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1637   }
1638 
1639   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644 
1645     while (its--) {
1646       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1648 
1649       /* update rhs: bb1 = bb - B*x */
1650       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1651       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1652 
1653       /* local sweep */
1654       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1655     }
1656   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1657     if (flag & SOR_ZERO_INITIAL_GUESS) {
1658       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1659       its--;
1660     }
1661     while (its--) {
1662       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664 
1665       /* update rhs: bb1 = bb - B*x */
1666       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1667       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1668 
1669       /* local sweep */
1670       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1671     }
1672   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1673     if (flag & SOR_ZERO_INITIAL_GUESS) {
1674       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1675       its--;
1676     }
1677     while (its--) {
1678       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1680 
1681       /* update rhs: bb1 = bb - B*x */
1682       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1683       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1684 
1685       /* local sweep */
1686       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1687     }
1688   }  else if (flag & SOR_EISENSTAT) {
1689     Vec         xx1;
1690 
1691     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1692     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1693 
1694     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696     if (!mat->diag) {
1697       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1698       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1699     }
1700     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1701     if (hasop) {
1702       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1703     } else {
1704       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1705     }
1706     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1707 
1708     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1709 
1710     /* local sweep */
1711     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1712     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1713     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1714   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1715 
1716   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatPermute_MPIAIJ"
1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1723 {
1724   Mat            aA,aB,Aperm;
1725   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1726   PetscScalar    *aa,*ba;
1727   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1728   PetscSF        rowsf,sf;
1729   IS             parcolp = PETSC_NULL;
1730   PetscBool      done;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1735   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1736   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1737   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1738 
1739   /* Invert row permutation to find out where my rows should go */
1740   ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr);
1741   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1742   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1743   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1744   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1745 
1746   /* Invert column permutation to find out where my columns should go */
1747   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1748   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1749   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1750   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1751   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1752   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1753 
1754   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1755   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1756   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1757 
1758   /* Find out where my gcols should go */
1759   ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr);
1760   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1761   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1762   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1763   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1764   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1765   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1766 
1767   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1768   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1769   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1770   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1771   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1772   for (i=0; i<m; i++) {
1773     PetscInt row = rdest[i],rowner;
1774     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1775     for (j=ai[i]; j<ai[i+1]; j++) {
1776       PetscInt cowner,col = cdest[aj[j]];
1777       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1778       if (rowner == cowner) dnnz[i]++;
1779       else onnz[i]++;
1780     }
1781     for (j=bi[i]; j<bi[i+1]; j++) {
1782       PetscInt cowner,col = gcdest[bj[j]];
1783       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1784       if (rowner == cowner) dnnz[i]++;
1785       else onnz[i]++;
1786     }
1787   }
1788   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1789   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1790   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1791   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1792   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1793   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1794   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1795 
1796   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->cmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1797   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1798   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1799   for (i=0; i<m; i++) {
1800     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1801     PetscInt rowlen;
1802     rowlen = ai[i+1] - ai[i];
1803     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1804     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1805     rowlen = bi[i+1] - bi[i];
1806     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1807     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1808   }
1809   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1810   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1811   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1812   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1813   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1814   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1815   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1816   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1817   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1818   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1819   *B = Aperm;
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1825 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1826 {
1827   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1828   Mat            A = mat->A,B = mat->B;
1829   PetscErrorCode ierr;
1830   PetscReal      isend[5],irecv[5];
1831 
1832   PetscFunctionBegin;
1833   info->block_size     = 1.0;
1834   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1835   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1836   isend[3] = info->memory;  isend[4] = info->mallocs;
1837   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1838   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1839   isend[3] += info->memory;  isend[4] += info->mallocs;
1840   if (flag == MAT_LOCAL) {
1841     info->nz_used      = isend[0];
1842     info->nz_allocated = isend[1];
1843     info->nz_unneeded  = isend[2];
1844     info->memory       = isend[3];
1845     info->mallocs      = isend[4];
1846   } else if (flag == MAT_GLOBAL_MAX) {
1847     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1848     info->nz_used      = irecv[0];
1849     info->nz_allocated = irecv[1];
1850     info->nz_unneeded  = irecv[2];
1851     info->memory       = irecv[3];
1852     info->mallocs      = irecv[4];
1853   } else if (flag == MAT_GLOBAL_SUM) {
1854     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1855     info->nz_used      = irecv[0];
1856     info->nz_allocated = irecv[1];
1857     info->nz_unneeded  = irecv[2];
1858     info->memory       = irecv[3];
1859     info->mallocs      = irecv[4];
1860   }
1861   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1862   info->fill_ratio_needed = 0;
1863   info->factor_mallocs    = 0;
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatSetOption_MPIAIJ"
1869 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1870 {
1871   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1872   PetscErrorCode ierr;
1873 
1874   PetscFunctionBegin;
1875   switch (op) {
1876   case MAT_NEW_NONZERO_LOCATIONS:
1877   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1878   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1879   case MAT_KEEP_NONZERO_PATTERN:
1880   case MAT_NEW_NONZERO_LOCATION_ERR:
1881   case MAT_USE_INODES:
1882   case MAT_IGNORE_ZERO_ENTRIES:
1883     MatCheckPreallocated(A,1);
1884     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1885     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1886     break;
1887   case MAT_ROW_ORIENTED:
1888     a->roworiented = flg;
1889     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1890     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1891     break;
1892   case MAT_NEW_DIAGONALS:
1893     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1894     break;
1895   case MAT_IGNORE_OFF_PROC_ENTRIES:
1896     a->donotstash = flg;
1897     break;
1898   case MAT_SPD:
1899     A->spd_set                         = PETSC_TRUE;
1900     A->spd                             = flg;
1901     if (flg) {
1902       A->symmetric                     = PETSC_TRUE;
1903       A->structurally_symmetric        = PETSC_TRUE;
1904       A->symmetric_set                 = PETSC_TRUE;
1905       A->structurally_symmetric_set    = PETSC_TRUE;
1906     }
1907     break;
1908   case MAT_SYMMETRIC:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   case MAT_STRUCTURALLY_SYMMETRIC:
1912     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1913     break;
1914   case MAT_HERMITIAN:
1915     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1916     break;
1917   case MAT_SYMMETRY_ETERNAL:
1918     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1919     break;
1920   default:
1921     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1922   }
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatGetRow_MPIAIJ"
1928 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1929 {
1930   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1931   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1932   PetscErrorCode ierr;
1933   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1934   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1935   PetscInt       *cmap,*idx_p;
1936 
1937   PetscFunctionBegin;
1938   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1939   mat->getrowactive = PETSC_TRUE;
1940 
1941   if (!mat->rowvalues && (idx || v)) {
1942     /*
1943         allocate enough space to hold information from the longest row.
1944     */
1945     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1946     PetscInt   max = 1,tmp;
1947     for (i=0; i<matin->rmap->n; i++) {
1948       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1949       if (max < tmp) { max = tmp; }
1950     }
1951     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1952   }
1953 
1954   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1955   lrow = row - rstart;
1956 
1957   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1958   if (!v)   {pvA = 0; pvB = 0;}
1959   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1960   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1961   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1962   nztot = nzA + nzB;
1963 
1964   cmap  = mat->garray;
1965   if (v  || idx) {
1966     if (nztot) {
1967       /* Sort by increasing column numbers, assuming A and B already sorted */
1968       PetscInt imark = -1;
1969       if (v) {
1970         *v = v_p = mat->rowvalues;
1971         for (i=0; i<nzB; i++) {
1972           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1973           else break;
1974         }
1975         imark = i;
1976         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1977         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1978       }
1979       if (idx) {
1980         *idx = idx_p = mat->rowindices;
1981         if (imark > -1) {
1982           for (i=0; i<imark; i++) {
1983             idx_p[i] = cmap[cworkB[i]];
1984           }
1985         } else {
1986           for (i=0; i<nzB; i++) {
1987             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1988             else break;
1989           }
1990           imark = i;
1991         }
1992         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1993         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1994       }
1995     } else {
1996       if (idx) *idx = 0;
1997       if (v)   *v   = 0;
1998     }
1999   }
2000   *nz = nztot;
2001   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
2002   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
2008 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2009 {
2010   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2011 
2012   PetscFunctionBegin;
2013   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2014   aij->getrowactive = PETSC_FALSE;
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #undef __FUNCT__
2019 #define __FUNCT__ "MatNorm_MPIAIJ"
2020 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2021 {
2022   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2023   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2024   PetscErrorCode ierr;
2025   PetscInt       i,j,cstart = mat->cmap->rstart;
2026   PetscReal      sum = 0.0;
2027   MatScalar      *v;
2028 
2029   PetscFunctionBegin;
2030   if (aij->size == 1) {
2031     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2032   } else {
2033     if (type == NORM_FROBENIUS) {
2034       v = amat->a;
2035       for (i=0; i<amat->nz; i++) {
2036 #if defined(PETSC_USE_COMPLEX)
2037         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2038 #else
2039         sum += (*v)*(*v); v++;
2040 #endif
2041       }
2042       v = bmat->a;
2043       for (i=0; i<bmat->nz; i++) {
2044 #if defined(PETSC_USE_COMPLEX)
2045         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2046 #else
2047         sum += (*v)*(*v); v++;
2048 #endif
2049       }
2050       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2051       *norm = PetscSqrtReal(*norm);
2052     } else if (type == NORM_1) { /* max column norm */
2053       PetscReal *tmp,*tmp2;
2054       PetscInt  *jj,*garray = aij->garray;
2055       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2056       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2057       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2058       *norm = 0.0;
2059       v = amat->a; jj = amat->j;
2060       for (j=0; j<amat->nz; j++) {
2061         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2062       }
2063       v = bmat->a; jj = bmat->j;
2064       for (j=0; j<bmat->nz; j++) {
2065         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2066       }
2067       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2068       for (j=0; j<mat->cmap->N; j++) {
2069         if (tmp2[j] > *norm) *norm = tmp2[j];
2070       }
2071       ierr = PetscFree(tmp);CHKERRQ(ierr);
2072       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2073     } else if (type == NORM_INFINITY) { /* max row norm */
2074       PetscReal ntemp = 0.0;
2075       for (j=0; j<aij->A->rmap->n; j++) {
2076         v = amat->a + amat->i[j];
2077         sum = 0.0;
2078         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2079           sum += PetscAbsScalar(*v); v++;
2080         }
2081         v = bmat->a + bmat->i[j];
2082         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2083           sum += PetscAbsScalar(*v); v++;
2084         }
2085         if (sum > ntemp) ntemp = sum;
2086       }
2087       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2088     } else {
2089       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2090     }
2091   }
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatTranspose_MPIAIJ"
2097 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2098 {
2099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2100   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2101   PetscErrorCode ierr;
2102   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2103   PetscInt       cstart=A->cmap->rstart,ncol;
2104   Mat            B;
2105   MatScalar      *array;
2106 
2107   PetscFunctionBegin;
2108   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2109 
2110   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2111   ai = Aloc->i; aj = Aloc->j;
2112   bi = Bloc->i; bj = Bloc->j;
2113   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2114     PetscInt *d_nnz,*g_nnz,*o_nnz;
2115     PetscSFNode *oloc;
2116     PETSC_UNUSED PetscSF sf;
2117 
2118     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2119     /* compute d_nnz for preallocation */
2120     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2121     for (i=0; i<ai[ma]; i++) {
2122       d_nnz[aj[i]] ++;
2123       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2124     }
2125 #if defined(OMPI_MAJOR_VERSION) /* Open MPI Bug. https://bitbucket.org/petsc/petsc-dev/issue/9/implement-petscsf-without-one-sided */
2126     ierr = PetscMemcpy(o_nnz,d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2127 #else
2128     /* compute local off-diagonal contributions */
2129     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2130     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2131     /* map those to global */
2132     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2133     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2134     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2135     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2136     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2137     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2138 #endif
2139 
2140     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2141     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2142     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr);
2143     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2144     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2145     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2146 #if defined(OMPI_MAJOR_VERSION)
2147     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2148 #endif
2149   } else {
2150     B = *matout;
2151     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2152     for (i=0; i<ai[ma]; i++){
2153       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2154     }
2155   }
2156 
2157   /* copy over the A part */
2158   array = Aloc->a;
2159   row = A->rmap->rstart;
2160   for (i=0; i<ma; i++) {
2161     ncol = ai[i+1]-ai[i];
2162     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2163     row++; array += ncol; aj += ncol;
2164   }
2165   aj = Aloc->j;
2166   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2167 
2168   /* copy over the B part */
2169   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2170   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2171   array = Bloc->a;
2172   row = A->rmap->rstart;
2173   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2174   cols_tmp = cols;
2175   for (i=0; i<mb; i++) {
2176     ncol = bi[i+1]-bi[i];
2177     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2178     row++; array += ncol; cols_tmp += ncol;
2179   }
2180   ierr = PetscFree(cols);CHKERRQ(ierr);
2181 
2182   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2183   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2184   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2185     *matout = B;
2186   } else {
2187     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2188   }
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2194 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2195 {
2196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2197   Mat            a = aij->A,b = aij->B;
2198   PetscErrorCode ierr;
2199   PetscInt       s1,s2,s3;
2200 
2201   PetscFunctionBegin;
2202   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2203   if (rr) {
2204     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2205     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2206     /* Overlap communication with computation. */
2207     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2208   }
2209   if (ll) {
2210     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2211     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2212     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2213   }
2214   /* scale  the diagonal block */
2215   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2216 
2217   if (rr) {
2218     /* Do a scatter end and then right scale the off-diagonal block */
2219     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2220     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2221   }
2222 
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2228 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2229 {
2230   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2231   PetscErrorCode ierr;
2232 
2233   PetscFunctionBegin;
2234   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatEqual_MPIAIJ"
2240 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2241 {
2242   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2243   Mat            a,b,c,d;
2244   PetscBool      flg;
2245   PetscErrorCode ierr;
2246 
2247   PetscFunctionBegin;
2248   a = matA->A; b = matA->B;
2249   c = matB->A; d = matB->B;
2250 
2251   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2252   if (flg) {
2253     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2254   }
2255   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatCopy_MPIAIJ"
2261 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2262 {
2263   PetscErrorCode ierr;
2264   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2265   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2266 
2267   PetscFunctionBegin;
2268   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2269   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2270     /* because of the column compression in the off-processor part of the matrix a->B,
2271        the number of columns in a->B and b->B may be different, hence we cannot call
2272        the MatCopy() directly on the two parts. If need be, we can provide a more
2273        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2274        then copying the submatrices */
2275     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2276   } else {
2277     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2278     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2279   }
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 #undef __FUNCT__
2284 #define __FUNCT__ "MatSetUp_MPIAIJ"
2285 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2286 {
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2296 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2297 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2298 {
2299   PetscInt          i,m=Y->rmap->N;
2300   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2301   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2302   const PetscInt    *xi = x->i,*yi = y->i;
2303 
2304   PetscFunctionBegin;
2305   /* Set the number of nonzeros in the new matrix */
2306   for (i=0; i<m; i++) {
2307     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2308     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2309     nnz[i] = 0;
2310     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2311       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2312       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2313       nnz[i]++;
2314     }
2315     for (; k<nzy; k++) nnz[i]++;
2316   }
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 #undef __FUNCT__
2321 #define __FUNCT__ "MatAXPY_MPIAIJ"
2322 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2323 {
2324   PetscErrorCode ierr;
2325   PetscInt       i;
2326   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2327   PetscBLASInt   bnz,one=1;
2328   Mat_SeqAIJ     *x,*y;
2329 
2330   PetscFunctionBegin;
2331   if (str == SAME_NONZERO_PATTERN) {
2332     PetscScalar alpha = a;
2333     x = (Mat_SeqAIJ *)xx->A->data;
2334     y = (Mat_SeqAIJ *)yy->A->data;
2335     bnz = PetscBLASIntCast(x->nz);
2336     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2337     x = (Mat_SeqAIJ *)xx->B->data;
2338     y = (Mat_SeqAIJ *)yy->B->data;
2339     bnz = PetscBLASIntCast(x->nz);
2340     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2341   } else if (str == SUBSET_NONZERO_PATTERN) {
2342     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2343 
2344     x = (Mat_SeqAIJ *)xx->B->data;
2345     y = (Mat_SeqAIJ *)yy->B->data;
2346     if (y->xtoy && y->XtoY != xx->B) {
2347       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2348       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2349     }
2350     if (!y->xtoy) { /* get xtoy */
2351       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2352       y->XtoY = xx->B;
2353       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2354     }
2355     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2356   } else {
2357     Mat B;
2358     PetscInt *nnz_d,*nnz_o;
2359     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2360     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2361     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2362     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2363     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2364     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2365     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2366     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2367     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2368     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2369     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2370     ierr = MatHeaderReplace(Y,B);
2371     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2372     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2373   }
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatConjugate_MPIAIJ"
2381 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2382 {
2383 #if defined(PETSC_USE_COMPLEX)
2384   PetscErrorCode ierr;
2385   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2386 
2387   PetscFunctionBegin;
2388   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2389   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2390 #else
2391   PetscFunctionBegin;
2392 #endif
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatRealPart_MPIAIJ"
2398 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2399 {
2400   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2401   PetscErrorCode ierr;
2402 
2403   PetscFunctionBegin;
2404   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2405   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2411 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2412 {
2413   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2414   PetscErrorCode ierr;
2415 
2416   PetscFunctionBegin;
2417   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2418   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 #ifdef PETSC_HAVE_PBGL
2423 
2424 #include <boost/parallel/mpi/bsp_process_group.hpp>
2425 #include <boost/graph/distributed/ilu_default_graph.hpp>
2426 #include <boost/graph/distributed/ilu_0_block.hpp>
2427 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2428 #include <boost/graph/distributed/petsc/interface.hpp>
2429 #include <boost/multi_array.hpp>
2430 #include <boost/parallel/distributed_property_map->hpp>
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2434 /*
2435   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2436 */
2437 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2438 {
2439   namespace petsc = boost::distributed::petsc;
2440 
2441   namespace graph_dist = boost::graph::distributed;
2442   using boost::graph::distributed::ilu_default::process_group_type;
2443   using boost::graph::ilu_permuted;
2444 
2445   PetscBool       row_identity, col_identity;
2446   PetscContainer  c;
2447   PetscInt        m, n, M, N;
2448   PetscErrorCode  ierr;
2449 
2450   PetscFunctionBegin;
2451   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2452   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2453   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2454   if (!row_identity || !col_identity) {
2455     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2456   }
2457 
2458   process_group_type pg;
2459   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2460   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2461   lgraph_type&   level_graph = *lgraph_p;
2462   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2463 
2464   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2465   ilu_permuted(level_graph);
2466 
2467   /* put together the new matrix */
2468   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2469   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2470   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2471   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2472   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr);
2473   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2474   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2475   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2476 
2477   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2478   ierr = PetscContainerSetPointer(c, lgraph_p);
2479   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2480   ierr = PetscContainerDestroy(&c);
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2486 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2487 {
2488   PetscFunctionBegin;
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "MatSolve_MPIAIJ"
2494 /*
2495   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2496 */
2497 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2498 {
2499   namespace graph_dist = boost::graph::distributed;
2500 
2501   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2502   lgraph_type*   lgraph_p;
2503   PetscContainer c;
2504   PetscErrorCode ierr;
2505 
2506   PetscFunctionBegin;
2507   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2508   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2509   ierr = VecCopy(b, x);CHKERRQ(ierr);
2510 
2511   PetscScalar* array_x;
2512   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2513   PetscInt sx;
2514   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2515 
2516   PetscScalar* array_b;
2517   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2518   PetscInt sb;
2519   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2520 
2521   lgraph_type&   level_graph = *lgraph_p;
2522   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2523 
2524   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2525   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2526                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2527 
2528   typedef boost::iterator_property_map<array_ref_type::iterator,
2529                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2530   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2531                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2532 
2533   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2534 
2535   PetscFunctionReturn(0);
2536 }
2537 #endif
2538 
2539 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2540   PetscInt       nzlocal,nsends,nrecvs;
2541   PetscMPIInt    *send_rank,*recv_rank;
2542   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2543   PetscScalar    *sbuf_a,**rbuf_a;
2544   PetscErrorCode (*Destroy)(Mat);
2545 } Mat_Redundant;
2546 
2547 #undef __FUNCT__
2548 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2549 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2550 {
2551   PetscErrorCode       ierr;
2552   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2553   PetscInt             i;
2554 
2555   PetscFunctionBegin;
2556   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2557   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2558   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2559   for (i=0; i<redund->nrecvs; i++){
2560     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2561     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2562   }
2563   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2564   ierr = PetscFree(redund);CHKERRQ(ierr);
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 #undef __FUNCT__
2569 #define __FUNCT__ "MatDestroy_MatRedundant"
2570 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2571 {
2572   PetscErrorCode  ierr;
2573   PetscContainer  container;
2574   Mat_Redundant   *redund=PETSC_NULL;
2575 
2576   PetscFunctionBegin;
2577   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2578   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2579   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2580   A->ops->destroy = redund->Destroy;
2581   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2582   if (A->ops->destroy) {
2583     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2584   }
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 #undef __FUNCT__
2589 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2590 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2591 {
2592   PetscMPIInt    rank,size;
2593   MPI_Comm       comm=((PetscObject)mat)->comm;
2594   PetscErrorCode ierr;
2595   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2596   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2597   PetscInt       *rowrange=mat->rmap->range;
2598   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2599   Mat            A=aij->A,B=aij->B,C=*matredundant;
2600   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2601   PetscScalar    *sbuf_a;
2602   PetscInt       nzlocal=a->nz+b->nz;
2603   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2604   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2605   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2606   MatScalar      *aworkA,*aworkB;
2607   PetscScalar    *vals;
2608   PetscMPIInt    tag1,tag2,tag3,imdex;
2609   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2610                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2611   MPI_Status     recv_status,*send_status;
2612   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2613   PetscInt       **rbuf_j=PETSC_NULL;
2614   PetscScalar    **rbuf_a=PETSC_NULL;
2615   Mat_Redundant  *redund=PETSC_NULL;
2616   PetscContainer container;
2617 
2618   PetscFunctionBegin;
2619   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2620   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2621 
2622   if (reuse == MAT_REUSE_MATRIX) {
2623     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2624     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2625     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2626     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2627     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2628     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2629     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2630     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2631 
2632     nsends    = redund->nsends;
2633     nrecvs    = redund->nrecvs;
2634     send_rank = redund->send_rank;
2635     recv_rank = redund->recv_rank;
2636     sbuf_nz   = redund->sbuf_nz;
2637     rbuf_nz   = redund->rbuf_nz;
2638     sbuf_j    = redund->sbuf_j;
2639     sbuf_a    = redund->sbuf_a;
2640     rbuf_j    = redund->rbuf_j;
2641     rbuf_a    = redund->rbuf_a;
2642   }
2643 
2644   if (reuse == MAT_INITIAL_MATRIX){
2645     PetscMPIInt  subrank,subsize;
2646     PetscInt     nleftover,np_subcomm;
2647     /* get the destination processors' id send_rank, nsends and nrecvs */
2648     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2649     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2650     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2651     np_subcomm = size/nsubcomm;
2652     nleftover  = size - nsubcomm*np_subcomm;
2653     nsends = 0; nrecvs = 0;
2654     for (i=0; i<size; i++){ /* i=rank*/
2655       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2656         send_rank[nsends] = i; nsends++;
2657         recv_rank[nrecvs++] = i;
2658       }
2659     }
2660     if (rank >= size - nleftover){/* this proc is a leftover processor */
2661       i = size-nleftover-1;
2662       j = 0;
2663       while (j < nsubcomm - nleftover){
2664         send_rank[nsends++] = i;
2665         i--; j++;
2666       }
2667     }
2668 
2669     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2670       for (i=0; i<nleftover; i++){
2671         recv_rank[nrecvs++] = size-nleftover+i;
2672       }
2673     }
2674 
2675     /* allocate sbuf_j, sbuf_a */
2676     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2677     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2678     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2679   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2680 
2681   /* copy mat's local entries into the buffers */
2682   if (reuse == MAT_INITIAL_MATRIX){
2683     rownz_max = 0;
2684     rptr = sbuf_j;
2685     cols = sbuf_j + rend-rstart + 1;
2686     vals = sbuf_a;
2687     rptr[0] = 0;
2688     for (i=0; i<rend-rstart; i++){
2689       row = i + rstart;
2690       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2691       ncols  = nzA + nzB;
2692       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2693       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2694       /* load the column indices for this row into cols */
2695       lwrite = 0;
2696       for (l=0; l<nzB; l++) {
2697         if ((ctmp = bmap[cworkB[l]]) < cstart){
2698           vals[lwrite]   = aworkB[l];
2699           cols[lwrite++] = ctmp;
2700         }
2701       }
2702       for (l=0; l<nzA; l++){
2703         vals[lwrite]   = aworkA[l];
2704         cols[lwrite++] = cstart + cworkA[l];
2705       }
2706       for (l=0; l<nzB; l++) {
2707         if ((ctmp = bmap[cworkB[l]]) >= cend){
2708           vals[lwrite]   = aworkB[l];
2709           cols[lwrite++] = ctmp;
2710         }
2711       }
2712       vals += ncols;
2713       cols += ncols;
2714       rptr[i+1] = rptr[i] + ncols;
2715       if (rownz_max < ncols) rownz_max = ncols;
2716     }
2717     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2718   } else { /* only copy matrix values into sbuf_a */
2719     rptr = sbuf_j;
2720     vals = sbuf_a;
2721     rptr[0] = 0;
2722     for (i=0; i<rend-rstart; i++){
2723       row = i + rstart;
2724       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2725       ncols  = nzA + nzB;
2726       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2727       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2728       lwrite = 0;
2729       for (l=0; l<nzB; l++) {
2730         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2731       }
2732       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2733       for (l=0; l<nzB; l++) {
2734         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2735       }
2736       vals += ncols;
2737       rptr[i+1] = rptr[i] + ncols;
2738     }
2739   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2740 
2741   /* send nzlocal to others, and recv other's nzlocal */
2742   /*--------------------------------------------------*/
2743   if (reuse == MAT_INITIAL_MATRIX){
2744     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2745     s_waits2 = s_waits3 + nsends;
2746     s_waits1 = s_waits2 + nsends;
2747     r_waits1 = s_waits1 + nsends;
2748     r_waits2 = r_waits1 + nrecvs;
2749     r_waits3 = r_waits2 + nrecvs;
2750   } else {
2751     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2752     r_waits3 = s_waits3 + nsends;
2753   }
2754 
2755   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2756   if (reuse == MAT_INITIAL_MATRIX){
2757     /* get new tags to keep the communication clean */
2758     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2759     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2760     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2761 
2762     /* post receives of other's nzlocal */
2763     for (i=0; i<nrecvs; i++){
2764       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2765     }
2766     /* send nzlocal to others */
2767     for (i=0; i<nsends; i++){
2768       sbuf_nz[i] = nzlocal;
2769       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2770     }
2771     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2772     count = nrecvs;
2773     while (count) {
2774       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2775       recv_rank[imdex] = recv_status.MPI_SOURCE;
2776       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2777       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2778 
2779       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2780       rbuf_nz[imdex] += i + 2;
2781       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2782       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2783       count--;
2784     }
2785     /* wait on sends of nzlocal */
2786     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2787     /* send mat->i,j to others, and recv from other's */
2788     /*------------------------------------------------*/
2789     for (i=0; i<nsends; i++){
2790       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2791       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2792     }
2793     /* wait on receives of mat->i,j */
2794     /*------------------------------*/
2795     count = nrecvs;
2796     while (count) {
2797       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2798       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2799       count--;
2800     }
2801     /* wait on sends of mat->i,j */
2802     /*---------------------------*/
2803     if (nsends) {
2804       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2805     }
2806   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2807 
2808   /* post receives, send and receive mat->a */
2809   /*----------------------------------------*/
2810   for (imdex=0; imdex<nrecvs; imdex++) {
2811     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2812   }
2813   for (i=0; i<nsends; i++){
2814     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2815   }
2816   count = nrecvs;
2817   while (count) {
2818     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2819     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2820     count--;
2821   }
2822   if (nsends) {
2823     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2824   }
2825 
2826   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2827 
2828   /* create redundant matrix */
2829   /*-------------------------*/
2830   if (reuse == MAT_INITIAL_MATRIX){
2831     /* compute rownz_max for preallocation */
2832     for (imdex=0; imdex<nrecvs; imdex++){
2833       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2834       rptr = rbuf_j[imdex];
2835       for (i=0; i<j; i++){
2836         ncols = rptr[i+1] - rptr[i];
2837         if (rownz_max < ncols) rownz_max = ncols;
2838       }
2839     }
2840 
2841     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2842     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2843     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr);
2844     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2845     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2846     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2847   } else {
2848     C = *matredundant;
2849   }
2850 
2851   /* insert local matrix entries */
2852   rptr = sbuf_j;
2853   cols = sbuf_j + rend-rstart + 1;
2854   vals = sbuf_a;
2855   for (i=0; i<rend-rstart; i++){
2856     row   = i + rstart;
2857     ncols = rptr[i+1] - rptr[i];
2858     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2859     vals += ncols;
2860     cols += ncols;
2861   }
2862   /* insert received matrix entries */
2863   for (imdex=0; imdex<nrecvs; imdex++){
2864     rstart = rowrange[recv_rank[imdex]];
2865     rend   = rowrange[recv_rank[imdex]+1];
2866     rptr = rbuf_j[imdex];
2867     cols = rbuf_j[imdex] + rend-rstart + 1;
2868     vals = rbuf_a[imdex];
2869     for (i=0; i<rend-rstart; i++){
2870       row   = i + rstart;
2871       ncols = rptr[i+1] - rptr[i];
2872       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2873       vals += ncols;
2874       cols += ncols;
2875     }
2876   }
2877   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2878   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2879   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2880   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2881   if (reuse == MAT_INITIAL_MATRIX) {
2882     PetscContainer container;
2883     *matredundant = C;
2884     /* create a supporting struct and attach it to C for reuse */
2885     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2886     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2887     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2888     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2889     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2890     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2891 
2892     redund->nzlocal = nzlocal;
2893     redund->nsends  = nsends;
2894     redund->nrecvs  = nrecvs;
2895     redund->send_rank = send_rank;
2896     redund->recv_rank = recv_rank;
2897     redund->sbuf_nz = sbuf_nz;
2898     redund->rbuf_nz = rbuf_nz;
2899     redund->sbuf_j  = sbuf_j;
2900     redund->sbuf_a  = sbuf_a;
2901     redund->rbuf_j  = rbuf_j;
2902     redund->rbuf_a  = rbuf_a;
2903 
2904     redund->Destroy = C->ops->destroy;
2905     C->ops->destroy = MatDestroy_MatRedundant;
2906   }
2907   PetscFunctionReturn(0);
2908 }
2909 
2910 #undef __FUNCT__
2911 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2912 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2913 {
2914   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2915   PetscErrorCode ierr;
2916   PetscInt       i,*idxb = 0;
2917   PetscScalar    *va,*vb;
2918   Vec            vtmp;
2919 
2920   PetscFunctionBegin;
2921   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2922   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2923   if (idx) {
2924     for (i=0; i<A->rmap->n; i++) {
2925       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2926     }
2927   }
2928 
2929   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2930   if (idx) {
2931     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2932   }
2933   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2934   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2935 
2936   for (i=0; i<A->rmap->n; i++){
2937     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2938       va[i] = vb[i];
2939       if (idx) idx[i] = a->garray[idxb[i]];
2940     }
2941   }
2942 
2943   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2944   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2945   ierr = PetscFree(idxb);CHKERRQ(ierr);
2946   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 #undef __FUNCT__
2951 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2952 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2953 {
2954   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2955   PetscErrorCode ierr;
2956   PetscInt       i,*idxb = 0;
2957   PetscScalar    *va,*vb;
2958   Vec            vtmp;
2959 
2960   PetscFunctionBegin;
2961   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2962   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2963   if (idx) {
2964     for (i=0; i<A->cmap->n; i++) {
2965       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2966     }
2967   }
2968 
2969   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2970   if (idx) {
2971     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2972   }
2973   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2974   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2975 
2976   for (i=0; i<A->rmap->n; i++){
2977     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2978       va[i] = vb[i];
2979       if (idx) idx[i] = a->garray[idxb[i]];
2980     }
2981   }
2982 
2983   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2984   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2985   ierr = PetscFree(idxb);CHKERRQ(ierr);
2986   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 #undef __FUNCT__
2991 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2992 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2993 {
2994   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2995   PetscInt       n      = A->rmap->n;
2996   PetscInt       cstart = A->cmap->rstart;
2997   PetscInt      *cmap   = mat->garray;
2998   PetscInt      *diagIdx, *offdiagIdx;
2999   Vec            diagV, offdiagV;
3000   PetscScalar   *a, *diagA, *offdiagA;
3001   PetscInt       r;
3002   PetscErrorCode ierr;
3003 
3004   PetscFunctionBegin;
3005   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3006   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
3007   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
3008   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3009   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3010   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3011   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3012   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3013   for (r = 0; r < n; ++r) {
3014     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3015       a[r]   = diagA[r];
3016       idx[r] = cstart + diagIdx[r];
3017     } else {
3018       a[r]   = offdiagA[r];
3019       idx[r] = cmap[offdiagIdx[r]];
3020     }
3021   }
3022   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3023   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3024   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3025   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3026   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3027   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3028   PetscFunctionReturn(0);
3029 }
3030 
3031 #undef __FUNCT__
3032 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3033 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3034 {
3035   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
3036   PetscInt       n      = A->rmap->n;
3037   PetscInt       cstart = A->cmap->rstart;
3038   PetscInt      *cmap   = mat->garray;
3039   PetscInt      *diagIdx, *offdiagIdx;
3040   Vec            diagV, offdiagV;
3041   PetscScalar   *a, *diagA, *offdiagA;
3042   PetscInt       r;
3043   PetscErrorCode ierr;
3044 
3045   PetscFunctionBegin;
3046   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3047   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
3048   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
3049   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3050   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3051   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3052   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3053   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3054   for (r = 0; r < n; ++r) {
3055     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3056       a[r]   = diagA[r];
3057       idx[r] = cstart + diagIdx[r];
3058     } else {
3059       a[r]   = offdiagA[r];
3060       idx[r] = cmap[offdiagIdx[r]];
3061     }
3062   }
3063   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3064   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3065   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3066   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3067   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3068   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3069   PetscFunctionReturn(0);
3070 }
3071 
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3074 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3075 {
3076   PetscErrorCode ierr;
3077   Mat            *dummy;
3078 
3079   PetscFunctionBegin;
3080   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3081   *newmat = *dummy;
3082   ierr = PetscFree(dummy);CHKERRQ(ierr);
3083   PetscFunctionReturn(0);
3084 }
3085 
3086 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3087 
3088 #undef __FUNCT__
3089 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3090 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3091 {
3092   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3093   PetscErrorCode ierr;
3094 
3095   PetscFunctionBegin;
3096   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3097   PetscFunctionReturn(0);
3098 }
3099 
3100 #undef __FUNCT__
3101 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3102 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3103 {
3104   PetscErrorCode ierr;
3105   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3106 
3107   PetscFunctionBegin;
3108   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3109   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3110   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3111   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3112   PetscFunctionReturn(0);
3113 }
3114 
3115 /* -------------------------------------------------------------------*/
3116 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3117        MatGetRow_MPIAIJ,
3118        MatRestoreRow_MPIAIJ,
3119        MatMult_MPIAIJ,
3120 /* 4*/ MatMultAdd_MPIAIJ,
3121        MatMultTranspose_MPIAIJ,
3122        MatMultTransposeAdd_MPIAIJ,
3123 #ifdef PETSC_HAVE_PBGL
3124        MatSolve_MPIAIJ,
3125 #else
3126        0,
3127 #endif
3128        0,
3129        0,
3130 /*10*/ 0,
3131        0,
3132        0,
3133        MatSOR_MPIAIJ,
3134        MatTranspose_MPIAIJ,
3135 /*15*/ MatGetInfo_MPIAIJ,
3136        MatEqual_MPIAIJ,
3137        MatGetDiagonal_MPIAIJ,
3138        MatDiagonalScale_MPIAIJ,
3139        MatNorm_MPIAIJ,
3140 /*20*/ MatAssemblyBegin_MPIAIJ,
3141        MatAssemblyEnd_MPIAIJ,
3142        MatSetOption_MPIAIJ,
3143        MatZeroEntries_MPIAIJ,
3144 /*24*/ MatZeroRows_MPIAIJ,
3145        0,
3146 #ifdef PETSC_HAVE_PBGL
3147        0,
3148 #else
3149        0,
3150 #endif
3151        0,
3152        0,
3153 /*29*/ MatSetUp_MPIAIJ,
3154 #ifdef PETSC_HAVE_PBGL
3155        0,
3156 #else
3157        0,
3158 #endif
3159        0,
3160        0,
3161        0,
3162 /*34*/ MatDuplicate_MPIAIJ,
3163        0,
3164        0,
3165        0,
3166        0,
3167 /*39*/ MatAXPY_MPIAIJ,
3168        MatGetSubMatrices_MPIAIJ,
3169        MatIncreaseOverlap_MPIAIJ,
3170        MatGetValues_MPIAIJ,
3171        MatCopy_MPIAIJ,
3172 /*44*/ MatGetRowMax_MPIAIJ,
3173        MatScale_MPIAIJ,
3174        0,
3175        0,
3176        MatZeroRowsColumns_MPIAIJ,
3177 /*49*/ MatSetRandom_MPIAIJ,
3178        0,
3179        0,
3180        0,
3181        0,
3182 /*54*/ MatFDColoringCreate_MPIAIJ,
3183        0,
3184        MatSetUnfactored_MPIAIJ,
3185        MatPermute_MPIAIJ,
3186        0,
3187 /*59*/ MatGetSubMatrix_MPIAIJ,
3188        MatDestroy_MPIAIJ,
3189        MatView_MPIAIJ,
3190        0,
3191        0,
3192 /*64*/ 0,
3193        0,
3194        0,
3195        0,
3196        0,
3197 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3198        MatGetRowMinAbs_MPIAIJ,
3199        0,
3200        MatSetColoring_MPIAIJ,
3201 #if defined(PETSC_HAVE_ADIC)
3202        MatSetValuesAdic_MPIAIJ,
3203 #else
3204        0,
3205 #endif
3206        MatSetValuesAdifor_MPIAIJ,
3207 /*75*/ MatFDColoringApply_AIJ,
3208        0,
3209        0,
3210        0,
3211        MatFindZeroDiagonals_MPIAIJ,
3212 /*80*/ 0,
3213        0,
3214        0,
3215 /*83*/ MatLoad_MPIAIJ,
3216        0,
3217        0,
3218        0,
3219        0,
3220        0,
3221 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3222        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3223        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3224        MatPtAP_MPIAIJ_MPIAIJ,
3225        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3226 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3227        0,
3228        0,
3229        0,
3230        0,
3231 /*99*/ 0,
3232        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3233        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3234        MatConjugate_MPIAIJ,
3235        0,
3236 /*104*/MatSetValuesRow_MPIAIJ,
3237        MatRealPart_MPIAIJ,
3238        MatImaginaryPart_MPIAIJ,
3239        0,
3240        0,
3241 /*109*/0,
3242        MatGetRedundantMatrix_MPIAIJ,
3243        MatGetRowMin_MPIAIJ,
3244        0,
3245        0,
3246 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3247        0,
3248        0,
3249        0,
3250        0,
3251 /*119*/0,
3252        0,
3253        0,
3254        0,
3255        MatGetMultiProcBlock_MPIAIJ,
3256 /*124*/MatFindNonzeroRows_MPIAIJ,
3257        MatGetColumnNorms_MPIAIJ,
3258        MatInvertBlockDiagonal_MPIAIJ,
3259        0,
3260        MatGetSubMatricesParallel_MPIAIJ,
3261 /*129*/0,
3262        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3263        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3264        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3265        0,
3266 /*134*/0,
3267        0,
3268        0,
3269        0,
3270        0
3271 };
3272 
3273 /* ----------------------------------------------------------------------------------------*/
3274 
3275 EXTERN_C_BEGIN
3276 #undef __FUNCT__
3277 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3278 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3279 {
3280   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3281   PetscErrorCode ierr;
3282 
3283   PetscFunctionBegin;
3284   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3285   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3286   PetscFunctionReturn(0);
3287 }
3288 EXTERN_C_END
3289 
3290 EXTERN_C_BEGIN
3291 #undef __FUNCT__
3292 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3293 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3294 {
3295   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3296   PetscErrorCode ierr;
3297 
3298   PetscFunctionBegin;
3299   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3300   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3301   PetscFunctionReturn(0);
3302 }
3303 EXTERN_C_END
3304 
3305 EXTERN_C_BEGIN
3306 #undef __FUNCT__
3307 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3308 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3309 {
3310   Mat_MPIAIJ     *b;
3311   PetscErrorCode ierr;
3312   PetscInt       i;
3313   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3314 
3315   PetscFunctionBegin;
3316   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3317   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3318   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3319   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3320   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3321   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3322 
3323   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3324   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3325   if (d_nnz) {
3326     for (i=0; i<B->rmap->n; i++) {
3327       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3328     }
3329   }
3330   if (o_nnz) {
3331     for (i=0; i<B->rmap->n; i++) {
3332       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3333     }
3334   }
3335   b = (Mat_MPIAIJ*)B->data;
3336 
3337   if (!B->preallocated) {
3338     /* Explicitly create 2 MATSEQAIJ matrices. */
3339     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3340     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3341     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3342     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3343     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3344     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3345     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3346     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3347     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3348     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3349   }
3350 
3351   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3352   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3353   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3354   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3355   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3356   B->preallocated = PETSC_TRUE;
3357   PetscFunctionReturn(0);
3358 }
3359 EXTERN_C_END
3360 
3361 #undef __FUNCT__
3362 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3363 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3364 {
3365   Mat            mat;
3366   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3367   PetscErrorCode ierr;
3368 
3369   PetscFunctionBegin;
3370   *newmat       = 0;
3371   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3372   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3373   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3374   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3375   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3376   a    = (Mat_MPIAIJ*)mat->data;
3377 
3378   mat->factortype    = matin->factortype;
3379   mat->rmap->bs      = matin->rmap->bs;
3380   mat->cmap->bs      = matin->cmap->bs;
3381   mat->assembled    = PETSC_TRUE;
3382   mat->insertmode   = NOT_SET_VALUES;
3383   mat->preallocated = PETSC_TRUE;
3384 
3385   a->size           = oldmat->size;
3386   a->rank           = oldmat->rank;
3387   a->donotstash     = oldmat->donotstash;
3388   a->roworiented    = oldmat->roworiented;
3389   a->rowindices     = 0;
3390   a->rowvalues      = 0;
3391   a->getrowactive   = PETSC_FALSE;
3392 
3393   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3394   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3395 
3396   if (oldmat->colmap) {
3397 #if defined (PETSC_USE_CTABLE)
3398     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3399 #else
3400     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3401     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3402     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3403 #endif
3404   } else a->colmap = 0;
3405   if (oldmat->garray) {
3406     PetscInt len;
3407     len  = oldmat->B->cmap->n;
3408     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3409     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3410     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3411   } else a->garray = 0;
3412 
3413   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3414   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3415   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3416   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3417   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3418   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3419   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3420   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3421   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3422   *newmat = mat;
3423   PetscFunctionReturn(0);
3424 }
3425 
3426 
3427 
3428 #undef __FUNCT__
3429 #define __FUNCT__ "MatLoad_MPIAIJ"
3430 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3431 {
3432   PetscScalar    *vals,*svals;
3433   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3434   PetscErrorCode ierr;
3435   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3436   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3437   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3438   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3439   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3440   int            fd;
3441   PetscInt       bs = 1;
3442 
3443   PetscFunctionBegin;
3444   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3445   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3446   if (!rank) {
3447     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3448     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3449     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3450   }
3451 
3452   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3453   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3454   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3455 
3456   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3457 
3458   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3459   M = header[1]; N = header[2];
3460   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3461   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3462   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3463 
3464   /* If global sizes are set, check if they are consistent with that given in the file */
3465   if (sizesset) {
3466     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3467   }
3468   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3469   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3470 
3471   /* determine ownership of all (block) rows */
3472   if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3473   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3474   else m = newMat->rmap->n; /* Set by user */
3475 
3476   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3477   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3478 
3479   /* First process needs enough room for process with most rows */
3480   if (!rank) {
3481     mmax = rowners[1];
3482     for (i=2; i<=size; i++) {
3483       mmax = PetscMax(mmax, rowners[i]);
3484     }
3485   } else mmax = m;
3486 
3487   rowners[0] = 0;
3488   for (i=2; i<=size; i++) {
3489     rowners[i] += rowners[i-1];
3490   }
3491   rstart = rowners[rank];
3492   rend   = rowners[rank+1];
3493 
3494   /* distribute row lengths to all processors */
3495   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3496   if (!rank) {
3497     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3498     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3499     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3500     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3501     for (j=0; j<m; j++) {
3502       procsnz[0] += ourlens[j];
3503     }
3504     for (i=1; i<size; i++) {
3505       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3506       /* calculate the number of nonzeros on each processor */
3507       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3508         procsnz[i] += rowlengths[j];
3509       }
3510       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3511     }
3512     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3513   } else {
3514     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3515   }
3516 
3517   if (!rank) {
3518     /* determine max buffer needed and allocate it */
3519     maxnz = 0;
3520     for (i=0; i<size; i++) {
3521       maxnz = PetscMax(maxnz,procsnz[i]);
3522     }
3523     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3524 
3525     /* read in my part of the matrix column indices  */
3526     nz   = procsnz[0];
3527     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3528     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3529 
3530     /* read in every one elses and ship off */
3531     for (i=1; i<size; i++) {
3532       nz     = procsnz[i];
3533       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3534       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3535     }
3536     ierr = PetscFree(cols);CHKERRQ(ierr);
3537   } else {
3538     /* determine buffer space needed for message */
3539     nz = 0;
3540     for (i=0; i<m; i++) {
3541       nz += ourlens[i];
3542     }
3543     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3544 
3545     /* receive message of column indices*/
3546     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3547   }
3548 
3549   /* determine column ownership if matrix is not square */
3550   if (N != M) {
3551     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3552     else n = newMat->cmap->n;
3553     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3554     cstart = cend - n;
3555   } else {
3556     cstart = rstart;
3557     cend   = rend;
3558     n      = cend - cstart;
3559   }
3560 
3561   /* loop over local rows, determining number of off diagonal entries */
3562   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3563   jj = 0;
3564   for (i=0; i<m; i++) {
3565     for (j=0; j<ourlens[i]; j++) {
3566       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3567       jj++;
3568     }
3569   }
3570 
3571   for (i=0; i<m; i++) {
3572     ourlens[i] -= offlens[i];
3573   }
3574   if (!sizesset) {
3575     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3576   }
3577 
3578   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3579 
3580   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3581 
3582   for (i=0; i<m; i++) {
3583     ourlens[i] += offlens[i];
3584   }
3585 
3586   if (!rank) {
3587     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3588 
3589     /* read in my part of the matrix numerical values  */
3590     nz   = procsnz[0];
3591     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3592 
3593     /* insert into matrix */
3594     jj      = rstart;
3595     smycols = mycols;
3596     svals   = vals;
3597     for (i=0; i<m; i++) {
3598       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3599       smycols += ourlens[i];
3600       svals   += ourlens[i];
3601       jj++;
3602     }
3603 
3604     /* read in other processors and ship out */
3605     for (i=1; i<size; i++) {
3606       nz     = procsnz[i];
3607       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3608       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3609     }
3610     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3611   } else {
3612     /* receive numeric values */
3613     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3614 
3615     /* receive message of values*/
3616     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3617 
3618     /* insert into matrix */
3619     jj      = rstart;
3620     smycols = mycols;
3621     svals   = vals;
3622     for (i=0; i<m; i++) {
3623       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3624       smycols += ourlens[i];
3625       svals   += ourlens[i];
3626       jj++;
3627     }
3628   }
3629   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3630   ierr = PetscFree(vals);CHKERRQ(ierr);
3631   ierr = PetscFree(mycols);CHKERRQ(ierr);
3632   ierr = PetscFree(rowners);CHKERRQ(ierr);
3633   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3634   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3635 
3636   PetscFunctionReturn(0);
3637 }
3638 
3639 #undef __FUNCT__
3640 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3641 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3642 {
3643   PetscErrorCode ierr;
3644   IS             iscol_local;
3645   PetscInt       csize;
3646 
3647   PetscFunctionBegin;
3648   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3649   if (call == MAT_REUSE_MATRIX) {
3650     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3651     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3652   } else {
3653     PetscInt cbs;
3654     ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr);
3655     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3656     ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr);
3657   }
3658   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3659   if (call == MAT_INITIAL_MATRIX) {
3660     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3661     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3662   }
3663   PetscFunctionReturn(0);
3664 }
3665 
3666 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3667 #undef __FUNCT__
3668 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3669 /*
3670     Not great since it makes two copies of the submatrix, first an SeqAIJ
3671   in local and then by concatenating the local matrices the end result.
3672   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3673 
3674   Note: This requires a sequential iscol with all indices.
3675 */
3676 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3677 {
3678   PetscErrorCode ierr;
3679   PetscMPIInt    rank,size;
3680   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3681   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3682   PetscBool      allcolumns, colflag;
3683   Mat            M,Mreuse;
3684   MatScalar      *vwork,*aa;
3685   MPI_Comm       comm = ((PetscObject)mat)->comm;
3686   Mat_SeqAIJ     *aij;
3687 
3688 
3689   PetscFunctionBegin;
3690   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3691   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3692 
3693   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3694   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3695   if (colflag && ncol == mat->cmap->N){
3696     allcolumns = PETSC_TRUE;
3697   } else {
3698     allcolumns = PETSC_FALSE;
3699   }
3700   if (call ==  MAT_REUSE_MATRIX) {
3701     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3702     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3703     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3704   } else {
3705     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3706   }
3707 
3708   /*
3709       m - number of local rows
3710       n - number of columns (same on all processors)
3711       rstart - first row in new global matrix generated
3712   */
3713   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3714   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3715   if (call == MAT_INITIAL_MATRIX) {
3716     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3717     ii  = aij->i;
3718     jj  = aij->j;
3719 
3720     /*
3721         Determine the number of non-zeros in the diagonal and off-diagonal
3722         portions of the matrix in order to do correct preallocation
3723     */
3724 
3725     /* first get start and end of "diagonal" columns */
3726     if (csize == PETSC_DECIDE) {
3727       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3728       if (mglobal == n) { /* square matrix */
3729 	nlocal = m;
3730       } else {
3731         nlocal = n/size + ((n % size) > rank);
3732       }
3733     } else {
3734       nlocal = csize;
3735     }
3736     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3737     rstart = rend - nlocal;
3738     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3739 
3740     /* next, compute all the lengths */
3741     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3742     olens = dlens + m;
3743     for (i=0; i<m; i++) {
3744       jend = ii[i+1] - ii[i];
3745       olen = 0;
3746       dlen = 0;
3747       for (j=0; j<jend; j++) {
3748         if (*jj < rstart || *jj >= rend) olen++;
3749         else dlen++;
3750         jj++;
3751       }
3752       olens[i] = olen;
3753       dlens[i] = dlen;
3754     }
3755     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3756     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3757     ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr);
3758     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3759     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3760     ierr = PetscFree(dlens);CHKERRQ(ierr);
3761   } else {
3762     PetscInt ml,nl;
3763 
3764     M = *newmat;
3765     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3766     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3767     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3768     /*
3769          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3770        rather than the slower MatSetValues().
3771     */
3772     M->was_assembled = PETSC_TRUE;
3773     M->assembled     = PETSC_FALSE;
3774   }
3775   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3776   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3777   ii  = aij->i;
3778   jj  = aij->j;
3779   aa  = aij->a;
3780   for (i=0; i<m; i++) {
3781     row   = rstart + i;
3782     nz    = ii[i+1] - ii[i];
3783     cwork = jj;     jj += nz;
3784     vwork = aa;     aa += nz;
3785     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3786   }
3787 
3788   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3789   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3790   *newmat = M;
3791 
3792   /* save submatrix used in processor for next request */
3793   if (call ==  MAT_INITIAL_MATRIX) {
3794     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3795     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3796   }
3797 
3798   PetscFunctionReturn(0);
3799 }
3800 
3801 EXTERN_C_BEGIN
3802 #undef __FUNCT__
3803 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3804 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3805 {
3806   PetscInt       m,cstart, cend,j,nnz,i,d;
3807   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3808   const PetscInt *JJ;
3809   PetscScalar    *values;
3810   PetscErrorCode ierr;
3811 
3812   PetscFunctionBegin;
3813   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3814 
3815   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3816   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3817   m      = B->rmap->n;
3818   cstart = B->cmap->rstart;
3819   cend   = B->cmap->rend;
3820   rstart = B->rmap->rstart;
3821 
3822   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3823 
3824 #if defined(PETSC_USE_DEBUGGING)
3825   for (i=0; i<m; i++) {
3826     nnz     = Ii[i+1]- Ii[i];
3827     JJ      = J + Ii[i];
3828     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3829     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3830     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3831   }
3832 #endif
3833 
3834   for (i=0; i<m; i++) {
3835     nnz     = Ii[i+1]- Ii[i];
3836     JJ      = J + Ii[i];
3837     nnz_max = PetscMax(nnz_max,nnz);
3838     d       = 0;
3839     for (j=0; j<nnz; j++) {
3840       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3841     }
3842     d_nnz[i] = d;
3843     o_nnz[i] = nnz - d;
3844   }
3845   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3846   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3847 
3848   if (v) values = (PetscScalar*)v;
3849   else {
3850     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3851     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3852   }
3853 
3854   for (i=0; i<m; i++) {
3855     ii   = i + rstart;
3856     nnz  = Ii[i+1]- Ii[i];
3857     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3858   }
3859   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3860   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3861 
3862   if (!v) {
3863     ierr = PetscFree(values);CHKERRQ(ierr);
3864   }
3865   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3866   PetscFunctionReturn(0);
3867 }
3868 EXTERN_C_END
3869 
3870 #undef __FUNCT__
3871 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3872 /*@
3873    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3874    (the default parallel PETSc format).
3875 
3876    Collective on MPI_Comm
3877 
3878    Input Parameters:
3879 +  B - the matrix
3880 .  i - the indices into j for the start of each local row (starts with zero)
3881 .  j - the column indices for each local row (starts with zero)
3882 -  v - optional values in the matrix
3883 
3884    Level: developer
3885 
3886    Notes:
3887        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3888      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3889      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3890 
3891        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3892 
3893        The format which is used for the sparse matrix input, is equivalent to a
3894     row-major ordering.. i.e for the following matrix, the input data expected is
3895     as shown:
3896 
3897         1 0 0
3898         2 0 3     P0
3899        -------
3900         4 5 6     P1
3901 
3902      Process0 [P0]: rows_owned=[0,1]
3903         i =  {0,1,3}  [size = nrow+1  = 2+1]
3904         j =  {0,0,2}  [size = nz = 6]
3905         v =  {1,2,3}  [size = nz = 6]
3906 
3907      Process1 [P1]: rows_owned=[2]
3908         i =  {0,3}    [size = nrow+1  = 1+1]
3909         j =  {0,1,2}  [size = nz = 6]
3910         v =  {4,5,6}  [size = nz = 6]
3911 
3912 .keywords: matrix, aij, compressed row, sparse, parallel
3913 
3914 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3915           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3916 @*/
3917 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3918 {
3919   PetscErrorCode ierr;
3920 
3921   PetscFunctionBegin;
3922   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3923   PetscFunctionReturn(0);
3924 }
3925 
3926 #undef __FUNCT__
3927 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3928 /*@C
3929    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3930    (the default parallel PETSc format).  For good matrix assembly performance
3931    the user should preallocate the matrix storage by setting the parameters
3932    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3933    performance can be increased by more than a factor of 50.
3934 
3935    Collective on MPI_Comm
3936 
3937    Input Parameters:
3938 +  A - the matrix
3939 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3940            (same value is used for all local rows)
3941 .  d_nnz - array containing the number of nonzeros in the various rows of the
3942            DIAGONAL portion of the local submatrix (possibly different for each row)
3943            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3944            The size of this array is equal to the number of local rows, i.e 'm'.
3945            For matrices that will be factored, you must leave room for (and set)
3946            the diagonal entry even if it is zero.
3947 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3948            submatrix (same value is used for all local rows).
3949 -  o_nnz - array containing the number of nonzeros in the various rows of the
3950            OFF-DIAGONAL portion of the local submatrix (possibly different for
3951            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3952            structure. The size of this array is equal to the number
3953            of local rows, i.e 'm'.
3954 
3955    If the *_nnz parameter is given then the *_nz parameter is ignored
3956 
3957    The AIJ format (also called the Yale sparse matrix format or
3958    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3959    storage.  The stored row and column indices begin with zero.
3960    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3961 
3962    The parallel matrix is partitioned such that the first m0 rows belong to
3963    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3964    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3965 
3966    The DIAGONAL portion of the local submatrix of a processor can be defined
3967    as the submatrix which is obtained by extraction the part corresponding to
3968    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3969    first row that belongs to the processor, r2 is the last row belonging to
3970    the this processor, and c1-c2 is range of indices of the local part of a
3971    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3972    common case of a square matrix, the row and column ranges are the same and
3973    the DIAGONAL part is also square. The remaining portion of the local
3974    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3975 
3976    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3977 
3978    You can call MatGetInfo() to get information on how effective the preallocation was;
3979    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3980    You can also run with the option -info and look for messages with the string
3981    malloc in them to see if additional memory allocation was needed.
3982 
3983    Example usage:
3984 
3985    Consider the following 8x8 matrix with 34 non-zero values, that is
3986    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3987    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3988    as follows:
3989 
3990 .vb
3991             1  2  0  |  0  3  0  |  0  4
3992     Proc0   0  5  6  |  7  0  0  |  8  0
3993             9  0 10  | 11  0  0  | 12  0
3994     -------------------------------------
3995            13  0 14  | 15 16 17  |  0  0
3996     Proc1   0 18  0  | 19 20 21  |  0  0
3997             0  0  0  | 22 23  0  | 24  0
3998     -------------------------------------
3999     Proc2  25 26 27  |  0  0 28  | 29  0
4000            30  0  0  | 31 32 33  |  0 34
4001 .ve
4002 
4003    This can be represented as a collection of submatrices as:
4004 
4005 .vb
4006       A B C
4007       D E F
4008       G H I
4009 .ve
4010 
4011    Where the submatrices A,B,C are owned by proc0, D,E,F are
4012    owned by proc1, G,H,I are owned by proc2.
4013 
4014    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4015    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4016    The 'M','N' parameters are 8,8, and have the same values on all procs.
4017 
4018    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4019    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4020    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4021    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4022    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4023    matrix, ans [DF] as another SeqAIJ matrix.
4024 
4025    When d_nz, o_nz parameters are specified, d_nz storage elements are
4026    allocated for every row of the local diagonal submatrix, and o_nz
4027    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4028    One way to choose d_nz and o_nz is to use the max nonzerors per local
4029    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4030    In this case, the values of d_nz,o_nz are:
4031 .vb
4032      proc0 : dnz = 2, o_nz = 2
4033      proc1 : dnz = 3, o_nz = 2
4034      proc2 : dnz = 1, o_nz = 4
4035 .ve
4036    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4037    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4038    for proc3. i.e we are using 12+15+10=37 storage locations to store
4039    34 values.
4040 
4041    When d_nnz, o_nnz parameters are specified, the storage is specified
4042    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4043    In the above case the values for d_nnz,o_nnz are:
4044 .vb
4045      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4046      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4047      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4048 .ve
4049    Here the space allocated is sum of all the above values i.e 34, and
4050    hence pre-allocation is perfect.
4051 
4052    Level: intermediate
4053 
4054 .keywords: matrix, aij, compressed row, sparse, parallel
4055 
4056 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4057           MPIAIJ, MatGetInfo()
4058 @*/
4059 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4060 {
4061   PetscErrorCode ierr;
4062 
4063   PetscFunctionBegin;
4064   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4065   PetscValidType(B,1);
4066   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4067   PetscFunctionReturn(0);
4068 }
4069 
4070 #undef __FUNCT__
4071 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4072 /*@
4073      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4074          CSR format the local rows.
4075 
4076    Collective on MPI_Comm
4077 
4078    Input Parameters:
4079 +  comm - MPI communicator
4080 .  m - number of local rows (Cannot be PETSC_DECIDE)
4081 .  n - This value should be the same as the local size used in creating the
4082        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4083        calculated if N is given) For square matrices n is almost always m.
4084 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4085 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4086 .   i - row indices
4087 .   j - column indices
4088 -   a - matrix values
4089 
4090    Output Parameter:
4091 .   mat - the matrix
4092 
4093    Level: intermediate
4094 
4095    Notes:
4096        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4097      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4098      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4099 
4100        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4101 
4102        The format which is used for the sparse matrix input, is equivalent to a
4103     row-major ordering.. i.e for the following matrix, the input data expected is
4104     as shown:
4105 
4106         1 0 0
4107         2 0 3     P0
4108        -------
4109         4 5 6     P1
4110 
4111      Process0 [P0]: rows_owned=[0,1]
4112         i =  {0,1,3}  [size = nrow+1  = 2+1]
4113         j =  {0,0,2}  [size = nz = 6]
4114         v =  {1,2,3}  [size = nz = 6]
4115 
4116      Process1 [P1]: rows_owned=[2]
4117         i =  {0,3}    [size = nrow+1  = 1+1]
4118         j =  {0,1,2}  [size = nz = 6]
4119         v =  {4,5,6}  [size = nz = 6]
4120 
4121 .keywords: matrix, aij, compressed row, sparse, parallel
4122 
4123 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4124           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4125 @*/
4126 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4127 {
4128   PetscErrorCode ierr;
4129 
4130   PetscFunctionBegin;
4131   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4132   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4133   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4134   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4135   /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */
4136   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4137   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4138   PetscFunctionReturn(0);
4139 }
4140 
4141 #undef __FUNCT__
4142 #define __FUNCT__ "MatCreateAIJ"
4143 /*@C
4144    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4145    (the default parallel PETSc format).  For good matrix assembly performance
4146    the user should preallocate the matrix storage by setting the parameters
4147    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4148    performance can be increased by more than a factor of 50.
4149 
4150    Collective on MPI_Comm
4151 
4152    Input Parameters:
4153 +  comm - MPI communicator
4154 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4155            This value should be the same as the local size used in creating the
4156            y vector for the matrix-vector product y = Ax.
4157 .  n - This value should be the same as the local size used in creating the
4158        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4159        calculated if N is given) For square matrices n is almost always m.
4160 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4161 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4162 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4163            (same value is used for all local rows)
4164 .  d_nnz - array containing the number of nonzeros in the various rows of the
4165            DIAGONAL portion of the local submatrix (possibly different for each row)
4166            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4167            The size of this array is equal to the number of local rows, i.e 'm'.
4168 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4169            submatrix (same value is used for all local rows).
4170 -  o_nnz - array containing the number of nonzeros in the various rows of the
4171            OFF-DIAGONAL portion of the local submatrix (possibly different for
4172            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4173            structure. The size of this array is equal to the number
4174            of local rows, i.e 'm'.
4175 
4176    Output Parameter:
4177 .  A - the matrix
4178 
4179    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4180    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4181    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4182 
4183    Notes:
4184    If the *_nnz parameter is given then the *_nz parameter is ignored
4185 
4186    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4187    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4188    storage requirements for this matrix.
4189 
4190    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4191    processor than it must be used on all processors that share the object for
4192    that argument.
4193 
4194    The user MUST specify either the local or global matrix dimensions
4195    (possibly both).
4196 
4197    The parallel matrix is partitioned across processors such that the
4198    first m0 rows belong to process 0, the next m1 rows belong to
4199    process 1, the next m2 rows belong to process 2 etc.. where
4200    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4201    values corresponding to [m x N] submatrix.
4202 
4203    The columns are logically partitioned with the n0 columns belonging
4204    to 0th partition, the next n1 columns belonging to the next
4205    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4206 
4207    The DIAGONAL portion of the local submatrix on any given processor
4208    is the submatrix corresponding to the rows and columns m,n
4209    corresponding to the given processor. i.e diagonal matrix on
4210    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4211    etc. The remaining portion of the local submatrix [m x (N-n)]
4212    constitute the OFF-DIAGONAL portion. The example below better
4213    illustrates this concept.
4214 
4215    For a square global matrix we define each processor's diagonal portion
4216    to be its local rows and the corresponding columns (a square submatrix);
4217    each processor's off-diagonal portion encompasses the remainder of the
4218    local matrix (a rectangular submatrix).
4219 
4220    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4221 
4222    When calling this routine with a single process communicator, a matrix of
4223    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4224    type of communicator, use the construction mechanism:
4225      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4226 
4227    By default, this format uses inodes (identical nodes) when possible.
4228    We search for consecutive rows with the same nonzero structure, thereby
4229    reusing matrix information to achieve increased efficiency.
4230 
4231    Options Database Keys:
4232 +  -mat_no_inode  - Do not use inodes
4233 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4234 -  -mat_aij_oneindex - Internally use indexing starting at 1
4235         rather than 0.  Note that when calling MatSetValues(),
4236         the user still MUST index entries starting at 0!
4237 
4238 
4239    Example usage:
4240 
4241    Consider the following 8x8 matrix with 34 non-zero values, that is
4242    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4243    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4244    as follows:
4245 
4246 .vb
4247             1  2  0  |  0  3  0  |  0  4
4248     Proc0   0  5  6  |  7  0  0  |  8  0
4249             9  0 10  | 11  0  0  | 12  0
4250     -------------------------------------
4251            13  0 14  | 15 16 17  |  0  0
4252     Proc1   0 18  0  | 19 20 21  |  0  0
4253             0  0  0  | 22 23  0  | 24  0
4254     -------------------------------------
4255     Proc2  25 26 27  |  0  0 28  | 29  0
4256            30  0  0  | 31 32 33  |  0 34
4257 .ve
4258 
4259    This can be represented as a collection of submatrices as:
4260 
4261 .vb
4262       A B C
4263       D E F
4264       G H I
4265 .ve
4266 
4267    Where the submatrices A,B,C are owned by proc0, D,E,F are
4268    owned by proc1, G,H,I are owned by proc2.
4269 
4270    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4271    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4272    The 'M','N' parameters are 8,8, and have the same values on all procs.
4273 
4274    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4275    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4276    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4277    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4278    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4279    matrix, ans [DF] as another SeqAIJ matrix.
4280 
4281    When d_nz, o_nz parameters are specified, d_nz storage elements are
4282    allocated for every row of the local diagonal submatrix, and o_nz
4283    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4284    One way to choose d_nz and o_nz is to use the max nonzerors per local
4285    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4286    In this case, the values of d_nz,o_nz are:
4287 .vb
4288      proc0 : dnz = 2, o_nz = 2
4289      proc1 : dnz = 3, o_nz = 2
4290      proc2 : dnz = 1, o_nz = 4
4291 .ve
4292    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4293    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4294    for proc3. i.e we are using 12+15+10=37 storage locations to store
4295    34 values.
4296 
4297    When d_nnz, o_nnz parameters are specified, the storage is specified
4298    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4299    In the above case the values for d_nnz,o_nnz are:
4300 .vb
4301      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4302      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4303      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4304 .ve
4305    Here the space allocated is sum of all the above values i.e 34, and
4306    hence pre-allocation is perfect.
4307 
4308    Level: intermediate
4309 
4310 .keywords: matrix, aij, compressed row, sparse, parallel
4311 
4312 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4313           MPIAIJ, MatCreateMPIAIJWithArrays()
4314 @*/
4315 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4316 {
4317   PetscErrorCode ierr;
4318   PetscMPIInt    size;
4319 
4320   PetscFunctionBegin;
4321   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4322   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4323   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4324   if (size > 1) {
4325     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4326     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4327   } else {
4328     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4329     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4330   }
4331   PetscFunctionReturn(0);
4332 }
4333 
4334 #undef __FUNCT__
4335 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4336 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4337 {
4338   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4339 
4340   PetscFunctionBegin;
4341   *Ad     = a->A;
4342   *Ao     = a->B;
4343   *colmap = a->garray;
4344   PetscFunctionReturn(0);
4345 }
4346 
4347 #undef __FUNCT__
4348 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4349 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4350 {
4351   PetscErrorCode ierr;
4352   PetscInt       i;
4353   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4354 
4355   PetscFunctionBegin;
4356   if (coloring->ctype == IS_COLORING_GLOBAL) {
4357     ISColoringValue *allcolors,*colors;
4358     ISColoring      ocoloring;
4359 
4360     /* set coloring for diagonal portion */
4361     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4362 
4363     /* set coloring for off-diagonal portion */
4364     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4365     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4366     for (i=0; i<a->B->cmap->n; i++) {
4367       colors[i] = allcolors[a->garray[i]];
4368     }
4369     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4370     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4371     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4372     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4373   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4374     ISColoringValue *colors;
4375     PetscInt        *larray;
4376     ISColoring      ocoloring;
4377 
4378     /* set coloring for diagonal portion */
4379     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4380     for (i=0; i<a->A->cmap->n; i++) {
4381       larray[i] = i + A->cmap->rstart;
4382     }
4383     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4384     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4385     for (i=0; i<a->A->cmap->n; i++) {
4386       colors[i] = coloring->colors[larray[i]];
4387     }
4388     ierr = PetscFree(larray);CHKERRQ(ierr);
4389     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4390     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4391     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4392 
4393     /* set coloring for off-diagonal portion */
4394     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4395     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4396     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4397     for (i=0; i<a->B->cmap->n; i++) {
4398       colors[i] = coloring->colors[larray[i]];
4399     }
4400     ierr = PetscFree(larray);CHKERRQ(ierr);
4401     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4402     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4403     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4404   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4405 
4406   PetscFunctionReturn(0);
4407 }
4408 
4409 #if defined(PETSC_HAVE_ADIC)
4410 #undef __FUNCT__
4411 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4412 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4413 {
4414   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4415   PetscErrorCode ierr;
4416 
4417   PetscFunctionBegin;
4418   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4419   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4420   PetscFunctionReturn(0);
4421 }
4422 #endif
4423 
4424 #undef __FUNCT__
4425 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4426 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4427 {
4428   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4429   PetscErrorCode ierr;
4430 
4431   PetscFunctionBegin;
4432   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4433   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4434   PetscFunctionReturn(0);
4435 }
4436 
4437 #undef __FUNCT__
4438 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4439 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4440 {
4441   PetscErrorCode ierr;
4442   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4443   PetscInt       *indx;
4444 
4445   PetscFunctionBegin;
4446   /* This routine will ONLY return MPIAIJ type matrix */
4447   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4448   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4449   if (n == PETSC_DECIDE){
4450     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4451   }
4452   /* Check sum(n) = N */
4453   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4454   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4455 
4456   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4457   rstart -= m;
4458 
4459   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4460   for (i=0;i<m;i++) {
4461     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4462     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4463     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4464   }
4465 
4466   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4467   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr);
4468   ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr);
4469   ierr = MatSetType(*outmat,MATMPIAIJ);  CHKERRQ(ierr);
4470   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4471   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4472   PetscFunctionReturn(0);
4473 }
4474 
4475 #undef __FUNCT__
4476 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4477 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4478 {
4479   PetscErrorCode ierr;
4480   PetscInt       m,N,i,rstart,nnz,Ii;
4481   PetscInt       *indx;
4482   PetscScalar    *values;
4483 
4484   PetscFunctionBegin;
4485   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4486   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4487   for (i=0;i<m;i++) {
4488     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4489     Ii    = i + rstart;
4490     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4491     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4492   }
4493   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4494   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4495   PetscFunctionReturn(0);
4496 }
4497 
4498 #undef __FUNCT__
4499 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4500 /*@
4501       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4502                  matrices from each processor
4503 
4504     Collective on MPI_Comm
4505 
4506    Input Parameters:
4507 +    comm - the communicators the parallel matrix will live on
4508 .    inmat - the input sequential matrices
4509 .    n - number of local columns (or PETSC_DECIDE)
4510 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4511 
4512    Output Parameter:
4513 .    outmat - the parallel matrix generated
4514 
4515     Level: advanced
4516 
4517    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4518 
4519 @*/
4520 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4521 {
4522   PetscErrorCode ierr;
4523 
4524   PetscFunctionBegin;
4525   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4526   if (scall == MAT_INITIAL_MATRIX){
4527     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4528   }
4529   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4530   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4531   PetscFunctionReturn(0);
4532 }
4533 
4534 #undef __FUNCT__
4535 #define __FUNCT__ "MatFileSplit"
4536 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4537 {
4538   PetscErrorCode    ierr;
4539   PetscMPIInt       rank;
4540   PetscInt          m,N,i,rstart,nnz;
4541   size_t            len;
4542   const PetscInt    *indx;
4543   PetscViewer       out;
4544   char              *name;
4545   Mat               B;
4546   const PetscScalar *values;
4547 
4548   PetscFunctionBegin;
4549   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4550   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4551   /* Should this be the type of the diagonal block of A? */
4552   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4553   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4554   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4555   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4556   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4557   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4558   for (i=0;i<m;i++) {
4559     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4560     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4561     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4562   }
4563   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4564   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4565 
4566   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4567   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4568   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4569   sprintf(name,"%s.%d",outfile,rank);
4570   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4571   ierr = PetscFree(name);
4572   ierr = MatView(B,out);CHKERRQ(ierr);
4573   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4574   ierr = MatDestroy(&B);CHKERRQ(ierr);
4575   PetscFunctionReturn(0);
4576 }
4577 
4578 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4579 #undef __FUNCT__
4580 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4581 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4582 {
4583   PetscErrorCode       ierr;
4584   Mat_Merge_SeqsToMPI  *merge;
4585   PetscContainer       container;
4586 
4587   PetscFunctionBegin;
4588   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4589   if (container) {
4590     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4591     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4592     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4593     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4594     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4595     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4596     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4597     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4598     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4599     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4600     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4601     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4602     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4603     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4604     ierr = PetscFree(merge);CHKERRQ(ierr);
4605     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4606   }
4607   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4608   PetscFunctionReturn(0);
4609 }
4610 
4611 #include <../src/mat/utils/freespace.h>
4612 #include <petscbt.h>
4613 
4614 #undef __FUNCT__
4615 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4616 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4617 {
4618   PetscErrorCode       ierr;
4619   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4620   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4621   PetscMPIInt          size,rank,taga,*len_s;
4622   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4623   PetscInt             proc,m;
4624   PetscInt             **buf_ri,**buf_rj;
4625   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4626   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4627   MPI_Request          *s_waits,*r_waits;
4628   MPI_Status           *status;
4629   MatScalar            *aa=a->a;
4630   MatScalar            **abuf_r,*ba_i;
4631   Mat_Merge_SeqsToMPI  *merge;
4632   PetscContainer       container;
4633 
4634   PetscFunctionBegin;
4635   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4636 
4637   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4638   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4639 
4640   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4641   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4642 
4643   bi     = merge->bi;
4644   bj     = merge->bj;
4645   buf_ri = merge->buf_ri;
4646   buf_rj = merge->buf_rj;
4647 
4648   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4649   owners = merge->rowmap->range;
4650   len_s  = merge->len_s;
4651 
4652   /* send and recv matrix values */
4653   /*-----------------------------*/
4654   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4655   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4656 
4657   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4658   for (proc=0,k=0; proc<size; proc++){
4659     if (!len_s[proc]) continue;
4660     i = owners[proc];
4661     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4662     k++;
4663   }
4664 
4665   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4666   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4667   ierr = PetscFree(status);CHKERRQ(ierr);
4668 
4669   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4670   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4671 
4672   /* insert mat values of mpimat */
4673   /*----------------------------*/
4674   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4675   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4676 
4677   for (k=0; k<merge->nrecv; k++){
4678     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4679     nrows = *(buf_ri_k[k]);
4680     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4681     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4682   }
4683 
4684   /* set values of ba */
4685   m = merge->rowmap->n;
4686   for (i=0; i<m; i++) {
4687     arow = owners[rank] + i;
4688     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4689     bnzi = bi[i+1] - bi[i];
4690     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4691 
4692     /* add local non-zero vals of this proc's seqmat into ba */
4693     anzi = ai[arow+1] - ai[arow];
4694     aj   = a->j + ai[arow];
4695     aa   = a->a + ai[arow];
4696     nextaj = 0;
4697     for (j=0; nextaj<anzi; j++){
4698       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4699         ba_i[j] += aa[nextaj++];
4700       }
4701     }
4702 
4703     /* add received vals into ba */
4704     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4705       /* i-th row */
4706       if (i == *nextrow[k]) {
4707         anzi = *(nextai[k]+1) - *nextai[k];
4708         aj   = buf_rj[k] + *(nextai[k]);
4709         aa   = abuf_r[k] + *(nextai[k]);
4710         nextaj = 0;
4711         for (j=0; nextaj<anzi; j++){
4712           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4713             ba_i[j] += aa[nextaj++];
4714           }
4715         }
4716         nextrow[k]++; nextai[k]++;
4717       }
4718     }
4719     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4720   }
4721   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4722   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4723 
4724   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4725   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4726   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4727   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4728   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4729   PetscFunctionReturn(0);
4730 }
4731 
4732 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4733 
4734 #undef __FUNCT__
4735 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4736 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4737 {
4738   PetscErrorCode       ierr;
4739   Mat                  B_mpi;
4740   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4741   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4742   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4743   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4744   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4745   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4746   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4747   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4748   MPI_Status           *status;
4749   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4750   PetscBT              lnkbt;
4751   Mat_Merge_SeqsToMPI  *merge;
4752   PetscContainer       container;
4753 
4754   PetscFunctionBegin;
4755   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4756 
4757   /* make sure it is a PETSc comm */
4758   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4759   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4760   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4761 
4762   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4763   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4764 
4765   /* determine row ownership */
4766   /*---------------------------------------------------------*/
4767   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4768   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4769   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4770   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4771   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4772   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4773   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4774 
4775   m      = merge->rowmap->n;
4776   M      = merge->rowmap->N;
4777   owners = merge->rowmap->range;
4778 
4779   /* determine the number of messages to send, their lengths */
4780   /*---------------------------------------------------------*/
4781   len_s  = merge->len_s;
4782 
4783   len = 0;  /* length of buf_si[] */
4784   merge->nsend = 0;
4785   for (proc=0; proc<size; proc++){
4786     len_si[proc] = 0;
4787     if (proc == rank){
4788       len_s[proc] = 0;
4789     } else {
4790       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4791       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4792     }
4793     if (len_s[proc]) {
4794       merge->nsend++;
4795       nrows = 0;
4796       for (i=owners[proc]; i<owners[proc+1]; i++){
4797         if (ai[i+1] > ai[i]) nrows++;
4798       }
4799       len_si[proc] = 2*(nrows+1);
4800       len += len_si[proc];
4801     }
4802   }
4803 
4804   /* determine the number and length of messages to receive for ij-structure */
4805   /*-------------------------------------------------------------------------*/
4806   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4807   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4808 
4809   /* post the Irecv of j-structure */
4810   /*-------------------------------*/
4811   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4812   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4813 
4814   /* post the Isend of j-structure */
4815   /*--------------------------------*/
4816   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4817 
4818   for (proc=0, k=0; proc<size; proc++){
4819     if (!len_s[proc]) continue;
4820     i = owners[proc];
4821     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4822     k++;
4823   }
4824 
4825   /* receives and sends of j-structure are complete */
4826   /*------------------------------------------------*/
4827   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4828   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4829 
4830   /* send and recv i-structure */
4831   /*---------------------------*/
4832   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4833   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4834 
4835   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4836   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4837   for (proc=0,k=0; proc<size; proc++){
4838     if (!len_s[proc]) continue;
4839     /* form outgoing message for i-structure:
4840          buf_si[0]:                 nrows to be sent
4841                [1:nrows]:           row index (global)
4842                [nrows+1:2*nrows+1]: i-structure index
4843     */
4844     /*-------------------------------------------*/
4845     nrows = len_si[proc]/2 - 1;
4846     buf_si_i    = buf_si + nrows+1;
4847     buf_si[0]   = nrows;
4848     buf_si_i[0] = 0;
4849     nrows = 0;
4850     for (i=owners[proc]; i<owners[proc+1]; i++){
4851       anzi = ai[i+1] - ai[i];
4852       if (anzi) {
4853         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4854         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4855         nrows++;
4856       }
4857     }
4858     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4859     k++;
4860     buf_si += len_si[proc];
4861   }
4862 
4863   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4864   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4865 
4866   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4867   for (i=0; i<merge->nrecv; i++){
4868     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4869   }
4870 
4871   ierr = PetscFree(len_si);CHKERRQ(ierr);
4872   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4873   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4874   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4875   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4876   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4877   ierr = PetscFree(status);CHKERRQ(ierr);
4878 
4879   /* compute a local seq matrix in each processor */
4880   /*----------------------------------------------*/
4881   /* allocate bi array and free space for accumulating nonzero column info */
4882   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4883   bi[0] = 0;
4884 
4885   /* create and initialize a linked list */
4886   nlnk = N+1;
4887   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4888 
4889   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4890   len = 0;
4891   len  = ai[owners[rank+1]] - ai[owners[rank]];
4892   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4893   current_space = free_space;
4894 
4895   /* determine symbolic info for each local row */
4896   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4897 
4898   for (k=0; k<merge->nrecv; k++){
4899     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4900     nrows = *buf_ri_k[k];
4901     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4902     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4903   }
4904 
4905   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4906   len = 0;
4907   for (i=0;i<m;i++) {
4908     bnzi   = 0;
4909     /* add local non-zero cols of this proc's seqmat into lnk */
4910     arow   = owners[rank] + i;
4911     anzi   = ai[arow+1] - ai[arow];
4912     aj     = a->j + ai[arow];
4913     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4914     bnzi += nlnk;
4915     /* add received col data into lnk */
4916     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4917       if (i == *nextrow[k]) { /* i-th row */
4918         anzi = *(nextai[k]+1) - *nextai[k];
4919         aj   = buf_rj[k] + *nextai[k];
4920         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4921         bnzi += nlnk;
4922         nextrow[k]++; nextai[k]++;
4923       }
4924     }
4925     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4926 
4927     /* if free space is not available, make more free space */
4928     if (current_space->local_remaining<bnzi) {
4929       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4930       nspacedouble++;
4931     }
4932     /* copy data into free space, then initialize lnk */
4933     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4934     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4935 
4936     current_space->array           += bnzi;
4937     current_space->local_used      += bnzi;
4938     current_space->local_remaining -= bnzi;
4939 
4940     bi[i+1] = bi[i] + bnzi;
4941   }
4942 
4943   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4944 
4945   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4946   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4947   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4948 
4949   /* create symbolic parallel matrix B_mpi */
4950   /*---------------------------------------*/
4951     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4952   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4953   if (n==PETSC_DECIDE) {
4954     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4955   } else {
4956     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4957   }
4958   ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr);
4959   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4960   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4961   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4962   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4963 
4964   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4965   B_mpi->assembled     = PETSC_FALSE;
4966   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4967   merge->bi            = bi;
4968   merge->bj            = bj;
4969   merge->buf_ri        = buf_ri;
4970   merge->buf_rj        = buf_rj;
4971   merge->coi           = PETSC_NULL;
4972   merge->coj           = PETSC_NULL;
4973   merge->owners_co     = PETSC_NULL;
4974 
4975   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4976 
4977   /* attach the supporting struct to B_mpi for reuse */
4978   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4979   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4980   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4981   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4982   *mpimat = B_mpi;
4983 
4984   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4985   PetscFunctionReturn(0);
4986 }
4987 
4988 #undef __FUNCT__
4989 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4990 /*@C
4991       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4992                  matrices from each processor
4993 
4994     Collective on MPI_Comm
4995 
4996    Input Parameters:
4997 +    comm - the communicators the parallel matrix will live on
4998 .    seqmat - the input sequential matrices
4999 .    m - number of local rows (or PETSC_DECIDE)
5000 .    n - number of local columns (or PETSC_DECIDE)
5001 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5002 
5003    Output Parameter:
5004 .    mpimat - the parallel matrix generated
5005 
5006     Level: advanced
5007 
5008    Notes:
5009      The dimensions of the sequential matrix in each processor MUST be the same.
5010      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5011      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5012 @*/
5013 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5014 {
5015   PetscErrorCode   ierr;
5016   PetscMPIInt     size;
5017 
5018   PetscFunctionBegin;
5019   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5020   if (size == 1){
5021      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5022      if (scall == MAT_INITIAL_MATRIX){
5023        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5024      } else {
5025        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5026      }
5027      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5028      PetscFunctionReturn(0);
5029   }
5030   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5031   if (scall == MAT_INITIAL_MATRIX){
5032     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5033   }
5034   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5035   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5036   PetscFunctionReturn(0);
5037 }
5038 
5039 #undef __FUNCT__
5040 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5041 /*@
5042      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5043           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5044           with MatGetSize()
5045 
5046     Not Collective
5047 
5048    Input Parameters:
5049 +    A - the matrix
5050 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5051 
5052    Output Parameter:
5053 .    A_loc - the local sequential matrix generated
5054 
5055     Level: developer
5056 
5057 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5058 
5059 @*/
5060 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5061 {
5062   PetscErrorCode  ierr;
5063   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
5064   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5065   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5066   MatScalar       *aa=a->a,*ba=b->a,*cam;
5067   PetscScalar     *ca;
5068   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5069   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
5070   PetscBool       match;
5071 
5072   PetscFunctionBegin;
5073   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5074   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5075   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5076   if (scall == MAT_INITIAL_MATRIX){
5077     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5078     ci[0] = 0;
5079     for (i=0; i<am; i++){
5080       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5081     }
5082     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5083     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5084     k = 0;
5085     for (i=0; i<am; i++) {
5086       ncols_o = bi[i+1] - bi[i];
5087       ncols_d = ai[i+1] - ai[i];
5088       /* off-diagonal portion of A */
5089       for (jo=0; jo<ncols_o; jo++) {
5090         col = cmap[*bj];
5091         if (col >= cstart) break;
5092         cj[k]   = col; bj++;
5093         ca[k++] = *ba++;
5094       }
5095       /* diagonal portion of A */
5096       for (j=0; j<ncols_d; j++) {
5097         cj[k]   = cstart + *aj++;
5098         ca[k++] = *aa++;
5099       }
5100       /* off-diagonal portion of A */
5101       for (j=jo; j<ncols_o; j++) {
5102         cj[k]   = cmap[*bj++];
5103         ca[k++] = *ba++;
5104       }
5105     }
5106     /* put together the new matrix */
5107     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5108     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5109     /* Since these are PETSc arrays, change flags to free them as necessary. */
5110     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5111     mat->free_a  = PETSC_TRUE;
5112     mat->free_ij = PETSC_TRUE;
5113     mat->nonew   = 0;
5114   } else if (scall == MAT_REUSE_MATRIX){
5115     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5116     ci = mat->i; cj = mat->j; cam = mat->a;
5117     for (i=0; i<am; i++) {
5118       /* off-diagonal portion of A */
5119       ncols_o = bi[i+1] - bi[i];
5120       for (jo=0; jo<ncols_o; jo++) {
5121         col = cmap[*bj];
5122         if (col >= cstart) break;
5123         *cam++ = *ba++; bj++;
5124       }
5125       /* diagonal portion of A */
5126       ncols_d = ai[i+1] - ai[i];
5127       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5128       /* off-diagonal portion of A */
5129       for (j=jo; j<ncols_o; j++) {
5130         *cam++ = *ba++; bj++;
5131       }
5132     }
5133   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5134   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5135   PetscFunctionReturn(0);
5136 }
5137 
5138 #undef __FUNCT__
5139 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5140 /*@C
5141      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5142 
5143     Not Collective
5144 
5145    Input Parameters:
5146 +    A - the matrix
5147 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5148 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5149 
5150    Output Parameter:
5151 .    A_loc - the local sequential matrix generated
5152 
5153     Level: developer
5154 
5155 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5156 
5157 @*/
5158 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5159 {
5160   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5161   PetscErrorCode    ierr;
5162   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5163   IS                isrowa,iscola;
5164   Mat               *aloc;
5165   PetscBool       match;
5166 
5167   PetscFunctionBegin;
5168   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5169   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5170   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5171   if (!row){
5172     start = A->rmap->rstart; end = A->rmap->rend;
5173     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5174   } else {
5175     isrowa = *row;
5176   }
5177   if (!col){
5178     start = A->cmap->rstart;
5179     cmap  = a->garray;
5180     nzA   = a->A->cmap->n;
5181     nzB   = a->B->cmap->n;
5182     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5183     ncols = 0;
5184     for (i=0; i<nzB; i++) {
5185       if (cmap[i] < start) idx[ncols++] = cmap[i];
5186       else break;
5187     }
5188     imark = i;
5189     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5190     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5191     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5192   } else {
5193     iscola = *col;
5194   }
5195   if (scall != MAT_INITIAL_MATRIX){
5196     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5197     aloc[0] = *A_loc;
5198   }
5199   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5200   *A_loc = aloc[0];
5201   ierr = PetscFree(aloc);CHKERRQ(ierr);
5202   if (!row){
5203     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5204   }
5205   if (!col){
5206     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5207   }
5208   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5209   PetscFunctionReturn(0);
5210 }
5211 
5212 #undef __FUNCT__
5213 #define __FUNCT__ "MatGetBrowsOfAcols"
5214 /*@C
5215     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5216 
5217     Collective on Mat
5218 
5219    Input Parameters:
5220 +    A,B - the matrices in mpiaij format
5221 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5222 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5223 
5224    Output Parameter:
5225 +    rowb, colb - index sets of rows and columns of B to extract
5226 -    B_seq - the sequential matrix generated
5227 
5228     Level: developer
5229 
5230 @*/
5231 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5232 {
5233   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5234   PetscErrorCode    ierr;
5235   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5236   IS                isrowb,iscolb;
5237   Mat               *bseq=PETSC_NULL;
5238 
5239   PetscFunctionBegin;
5240   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5241     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5242   }
5243   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5244 
5245   if (scall == MAT_INITIAL_MATRIX){
5246     start = A->cmap->rstart;
5247     cmap  = a->garray;
5248     nzA   = a->A->cmap->n;
5249     nzB   = a->B->cmap->n;
5250     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5251     ncols = 0;
5252     for (i=0; i<nzB; i++) {  /* row < local row index */
5253       if (cmap[i] < start) idx[ncols++] = cmap[i];
5254       else break;
5255     }
5256     imark = i;
5257     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5258     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5259     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5260     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5261   } else {
5262     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5263     isrowb = *rowb; iscolb = *colb;
5264     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5265     bseq[0] = *B_seq;
5266   }
5267   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5268   *B_seq = bseq[0];
5269   ierr = PetscFree(bseq);CHKERRQ(ierr);
5270   if (!rowb){
5271     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5272   } else {
5273     *rowb = isrowb;
5274   }
5275   if (!colb){
5276     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5277   } else {
5278     *colb = iscolb;
5279   }
5280   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5281   PetscFunctionReturn(0);
5282 }
5283 
5284 #undef __FUNCT__
5285 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5286 /*
5287     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5288     of the OFF-DIAGONAL portion of local A
5289 
5290     Collective on Mat
5291 
5292    Input Parameters:
5293 +    A,B - the matrices in mpiaij format
5294 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5295 
5296    Output Parameter:
5297 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5298 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5299 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5300 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5301 
5302     Level: developer
5303 
5304 */
5305 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5306 {
5307   VecScatter_MPI_General *gen_to,*gen_from;
5308   PetscErrorCode         ierr;
5309   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5310   Mat_SeqAIJ             *b_oth;
5311   VecScatter             ctx=a->Mvctx;
5312   MPI_Comm               comm=((PetscObject)ctx)->comm;
5313   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5314   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5315   PetscScalar            *rvalues,*svalues;
5316   MatScalar              *b_otha,*bufa,*bufA;
5317   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5318   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5319   MPI_Status             *sstatus,rstatus;
5320   PetscMPIInt            jj;
5321   PetscInt               *cols,sbs,rbs;
5322   PetscScalar            *vals;
5323 
5324   PetscFunctionBegin;
5325   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5326     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5327   }
5328   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5329   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5330 
5331   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5332   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5333   rvalues  = gen_from->values; /* holds the length of receiving row */
5334   svalues  = gen_to->values;   /* holds the length of sending row */
5335   nrecvs   = gen_from->n;
5336   nsends   = gen_to->n;
5337 
5338   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5339   srow     = gen_to->indices;   /* local row index to be sent */
5340   sstarts  = gen_to->starts;
5341   sprocs   = gen_to->procs;
5342   sstatus  = gen_to->sstatus;
5343   sbs      = gen_to->bs;
5344   rstarts  = gen_from->starts;
5345   rprocs   = gen_from->procs;
5346   rbs      = gen_from->bs;
5347 
5348   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5349   if (scall == MAT_INITIAL_MATRIX){
5350     /* i-array */
5351     /*---------*/
5352     /*  post receives */
5353     for (i=0; i<nrecvs; i++){
5354       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5355       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5356       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5357     }
5358 
5359     /* pack the outgoing message */
5360     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5361     sstartsj[0] = 0;  rstartsj[0] = 0;
5362     len = 0; /* total length of j or a array to be sent */
5363     k = 0;
5364     for (i=0; i<nsends; i++){
5365       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5366       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5367       for (j=0; j<nrows; j++) {
5368         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5369         for (l=0; l<sbs; l++){
5370           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5371           rowlen[j*sbs+l] = ncols;
5372           len += ncols;
5373           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5374         }
5375         k++;
5376       }
5377       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5378       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5379     }
5380     /* recvs and sends of i-array are completed */
5381     i = nrecvs;
5382     while (i--) {
5383       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5384     }
5385     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5386 
5387     /* allocate buffers for sending j and a arrays */
5388     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5389     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5390 
5391     /* create i-array of B_oth */
5392     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5393     b_othi[0] = 0;
5394     len = 0; /* total length of j or a array to be received */
5395     k = 0;
5396     for (i=0; i<nrecvs; i++){
5397       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5398       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5399       for (j=0; j<nrows; j++) {
5400         b_othi[k+1] = b_othi[k] + rowlen[j];
5401         len += rowlen[j]; k++;
5402       }
5403       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5404     }
5405 
5406     /* allocate space for j and a arrrays of B_oth */
5407     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5408     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5409 
5410     /* j-array */
5411     /*---------*/
5412     /*  post receives of j-array */
5413     for (i=0; i<nrecvs; i++){
5414       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5415       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5416     }
5417 
5418     /* pack the outgoing message j-array */
5419     k = 0;
5420     for (i=0; i<nsends; i++){
5421       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5422       bufJ = bufj+sstartsj[i];
5423       for (j=0; j<nrows; j++) {
5424         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5425         for (ll=0; ll<sbs; ll++){
5426           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5427           for (l=0; l<ncols; l++){
5428             *bufJ++ = cols[l];
5429           }
5430           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5431         }
5432       }
5433       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5434     }
5435 
5436     /* recvs and sends of j-array are completed */
5437     i = nrecvs;
5438     while (i--) {
5439       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5440     }
5441     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5442   } else if (scall == MAT_REUSE_MATRIX){
5443     sstartsj = *startsj_s;
5444     rstartsj = *startsj_r;
5445     bufa     = *bufa_ptr;
5446     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5447     b_otha   = b_oth->a;
5448   } else {
5449     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5450   }
5451 
5452   /* a-array */
5453   /*---------*/
5454   /*  post receives of a-array */
5455   for (i=0; i<nrecvs; i++){
5456     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5457     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5458   }
5459 
5460   /* pack the outgoing message a-array */
5461   k = 0;
5462   for (i=0; i<nsends; i++){
5463     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5464     bufA = bufa+sstartsj[i];
5465     for (j=0; j<nrows; j++) {
5466       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5467       for (ll=0; ll<sbs; ll++){
5468         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5469         for (l=0; l<ncols; l++){
5470           *bufA++ = vals[l];
5471         }
5472         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5473       }
5474     }
5475     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5476   }
5477   /* recvs and sends of a-array are completed */
5478   i = nrecvs;
5479   while (i--) {
5480     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5481   }
5482   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5483   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5484 
5485   if (scall == MAT_INITIAL_MATRIX){
5486     /* put together the new matrix */
5487     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5488 
5489     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5490     /* Since these are PETSc arrays, change flags to free them as necessary. */
5491     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5492     b_oth->free_a  = PETSC_TRUE;
5493     b_oth->free_ij = PETSC_TRUE;
5494     b_oth->nonew   = 0;
5495 
5496     ierr = PetscFree(bufj);CHKERRQ(ierr);
5497     if (!startsj_s || !bufa_ptr){
5498       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5499       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5500     } else {
5501       *startsj_s = sstartsj;
5502       *startsj_r = rstartsj;
5503       *bufa_ptr  = bufa;
5504     }
5505   }
5506   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5507   PetscFunctionReturn(0);
5508 }
5509 
5510 #undef __FUNCT__
5511 #define __FUNCT__ "MatGetCommunicationStructs"
5512 /*@C
5513   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5514 
5515   Not Collective
5516 
5517   Input Parameters:
5518 . A - The matrix in mpiaij format
5519 
5520   Output Parameter:
5521 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5522 . colmap - A map from global column index to local index into lvec
5523 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5524 
5525   Level: developer
5526 
5527 @*/
5528 #if defined (PETSC_USE_CTABLE)
5529 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5530 #else
5531 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5532 #endif
5533 {
5534   Mat_MPIAIJ *a;
5535 
5536   PetscFunctionBegin;
5537   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5538   PetscValidPointer(lvec, 2);
5539   PetscValidPointer(colmap, 3);
5540   PetscValidPointer(multScatter, 4);
5541   a = (Mat_MPIAIJ *) A->data;
5542   if (lvec) *lvec = a->lvec;
5543   if (colmap) *colmap = a->colmap;
5544   if (multScatter) *multScatter = a->Mvctx;
5545   PetscFunctionReturn(0);
5546 }
5547 
5548 EXTERN_C_BEGIN
5549 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5550 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5551 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5552 EXTERN_C_END
5553 
5554 #undef __FUNCT__
5555 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5556 /*
5557     Computes (B'*A')' since computing B*A directly is untenable
5558 
5559                n                       p                          p
5560         (              )       (              )         (                  )
5561       m (      A       )  *  n (       B      )   =   m (         C        )
5562         (              )       (              )         (                  )
5563 
5564 */
5565 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5566 {
5567   PetscErrorCode     ierr;
5568   Mat                At,Bt,Ct;
5569 
5570   PetscFunctionBegin;
5571   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5572   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5573   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5574   ierr = MatDestroy(&At);CHKERRQ(ierr);
5575   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5576   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5577   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5578   PetscFunctionReturn(0);
5579 }
5580 
5581 #undef __FUNCT__
5582 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5583 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5584 {
5585   PetscErrorCode ierr;
5586   PetscInt       m=A->rmap->n,n=B->cmap->n;
5587   Mat            Cmat;
5588 
5589   PetscFunctionBegin;
5590   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5591   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5592   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5593   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5594   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5595   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5596   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5597   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5598   *C   = Cmat;
5599   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5600   PetscFunctionReturn(0);
5601 }
5602 
5603 /* ----------------------------------------------------------------*/
5604 #undef __FUNCT__
5605 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5606 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5607 {
5608   PetscErrorCode ierr;
5609 
5610   PetscFunctionBegin;
5611   if (scall == MAT_INITIAL_MATRIX){
5612     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5613   }
5614   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5615   PetscFunctionReturn(0);
5616 }
5617 
5618 EXTERN_C_BEGIN
5619 #if defined(PETSC_HAVE_MUMPS)
5620 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5621 #endif
5622 #if defined(PETSC_HAVE_PASTIX)
5623 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5624 #endif
5625 #if defined(PETSC_HAVE_SUPERLU_DIST)
5626 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5627 #endif
5628 #if defined(PETSC_HAVE_CLIQUE)
5629 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5630 #endif
5631 EXTERN_C_END
5632 
5633 /*MC
5634    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5635 
5636    Options Database Keys:
5637 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5638 
5639   Level: beginner
5640 
5641 .seealso: MatCreateAIJ()
5642 M*/
5643 
5644 EXTERN_C_BEGIN
5645 #undef __FUNCT__
5646 #define __FUNCT__ "MatCreate_MPIAIJ"
5647 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5648 {
5649   Mat_MPIAIJ     *b;
5650   PetscErrorCode ierr;
5651   PetscMPIInt    size;
5652 
5653   PetscFunctionBegin;
5654   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5655   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5656   B->data         = (void*)b;
5657   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5658   B->assembled    = PETSC_FALSE;
5659   B->insertmode   = NOT_SET_VALUES;
5660   b->size         = size;
5661   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5662 
5663   /* build cache for off array entries formed */
5664   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5665   b->donotstash  = PETSC_FALSE;
5666   b->colmap      = 0;
5667   b->garray      = 0;
5668   b->roworiented = PETSC_TRUE;
5669 
5670   /* stuff used for matrix vector multiply */
5671   b->lvec      = PETSC_NULL;
5672   b->Mvctx     = PETSC_NULL;
5673 
5674   /* stuff for MatGetRow() */
5675   b->rowindices   = 0;
5676   b->rowvalues    = 0;
5677   b->getrowactive = PETSC_FALSE;
5678 
5679   /* flexible pointer used in CUSP/CUSPARSE classes */
5680   b->spptr        = PETSC_NULL;
5681 
5682 #if defined(PETSC_HAVE_MUMPS)
5683   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5684                                      "MatGetFactor_aij_mumps",
5685                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5686 #endif
5687 #if defined(PETSC_HAVE_PASTIX)
5688   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5689 					   "MatGetFactor_mpiaij_pastix",
5690 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5691 #endif
5692 #if defined(PETSC_HAVE_SUPERLU_DIST)
5693   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5694                                      "MatGetFactor_mpiaij_superlu_dist",
5695                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5696 #endif
5697 #if defined(PETSC_HAVE_CLIQUE)
5698   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5699                                      "MatGetFactor_aij_clique",
5700                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5701 #endif
5702   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5703                                      "MatStoreValues_MPIAIJ",
5704                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5705   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5706                                      "MatRetrieveValues_MPIAIJ",
5707                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5708   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5709 				     "MatGetDiagonalBlock_MPIAIJ",
5710                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5711   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5712 				     "MatIsTranspose_MPIAIJ",
5713 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5714   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5715 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5716 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5717   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5718 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5719 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5720   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5721 				     "MatDiagonalScaleLocal_MPIAIJ",
5722 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5723   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5724                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5725                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5726   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5727                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5728                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5729   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5730                                      "MatConvert_MPIAIJ_MPISBAIJ",
5731                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5732   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5733                                      "MatMatMult_MPIDense_MPIAIJ",
5734                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5735   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5736                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5737                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5738   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5739                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5740                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5741   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5742   PetscFunctionReturn(0);
5743 }
5744 EXTERN_C_END
5745 
5746 #undef __FUNCT__
5747 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5748 /*@
5749      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5750          and "off-diagonal" part of the matrix in CSR format.
5751 
5752    Collective on MPI_Comm
5753 
5754    Input Parameters:
5755 +  comm - MPI communicator
5756 .  m - number of local rows (Cannot be PETSC_DECIDE)
5757 .  n - This value should be the same as the local size used in creating the
5758        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5759        calculated if N is given) For square matrices n is almost always m.
5760 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5761 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5762 .   i - row indices for "diagonal" portion of matrix
5763 .   j - column indices
5764 .   a - matrix values
5765 .   oi - row indices for "off-diagonal" portion of matrix
5766 .   oj - column indices
5767 -   oa - matrix values
5768 
5769    Output Parameter:
5770 .   mat - the matrix
5771 
5772    Level: advanced
5773 
5774    Notes:
5775        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5776        must free the arrays once the matrix has been destroyed and not before.
5777 
5778        The i and j indices are 0 based
5779 
5780        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5781 
5782        This sets local rows and cannot be used to set off-processor values.
5783 
5784        You cannot later use MatSetValues() to change values in this matrix.
5785 
5786 .keywords: matrix, aij, compressed row, sparse, parallel
5787 
5788 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5789           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5790 @*/
5791 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5792 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5793 {
5794   PetscErrorCode ierr;
5795   Mat_MPIAIJ     *maij;
5796 
5797  PetscFunctionBegin;
5798   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5799   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5800   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5801   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5802   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5803   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5804   maij = (Mat_MPIAIJ*) (*mat)->data;
5805   maij->donotstash     = PETSC_TRUE;
5806   (*mat)->preallocated = PETSC_TRUE;
5807 
5808   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5809   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5810 
5811   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5812   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5813 
5814   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5815   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5816   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5817   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5818 
5819   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5820   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5821   PetscFunctionReturn(0);
5822 }
5823 
5824 /*
5825     Special version for direct calls from Fortran
5826 */
5827 #include <petsc-private/fortranimpl.h>
5828 
5829 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5830 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5831 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5832 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5833 #endif
5834 
5835 /* Change these macros so can be used in void function */
5836 #undef CHKERRQ
5837 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5838 #undef SETERRQ2
5839 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5840 #undef SETERRQ3
5841 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5842 #undef SETERRQ
5843 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5844 
5845 EXTERN_C_BEGIN
5846 #undef __FUNCT__
5847 #define __FUNCT__ "matsetvaluesmpiaij_"
5848 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5849 {
5850   Mat             mat = *mmat;
5851   PetscInt        m = *mm, n = *mn;
5852   InsertMode      addv = *maddv;
5853   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5854   PetscScalar     value;
5855   PetscErrorCode  ierr;
5856 
5857   MatCheckPreallocated(mat,1);
5858   if (mat->insertmode == NOT_SET_VALUES) {
5859     mat->insertmode = addv;
5860   }
5861 #if defined(PETSC_USE_DEBUG)
5862   else if (mat->insertmode != addv) {
5863     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5864   }
5865 #endif
5866   {
5867   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5868   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5869   PetscBool       roworiented = aij->roworiented;
5870 
5871   /* Some Variables required in the macro */
5872   Mat             A = aij->A;
5873   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5874   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5875   MatScalar       *aa = a->a;
5876   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5877   Mat             B = aij->B;
5878   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5879   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5880   MatScalar       *ba = b->a;
5881 
5882   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5883   PetscInt        nonew = a->nonew;
5884   MatScalar       *ap1,*ap2;
5885 
5886   PetscFunctionBegin;
5887   for (i=0; i<m; i++) {
5888     if (im[i] < 0) continue;
5889 #if defined(PETSC_USE_DEBUG)
5890     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5891 #endif
5892     if (im[i] >= rstart && im[i] < rend) {
5893       row      = im[i] - rstart;
5894       lastcol1 = -1;
5895       rp1      = aj + ai[row];
5896       ap1      = aa + ai[row];
5897       rmax1    = aimax[row];
5898       nrow1    = ailen[row];
5899       low1     = 0;
5900       high1    = nrow1;
5901       lastcol2 = -1;
5902       rp2      = bj + bi[row];
5903       ap2      = ba + bi[row];
5904       rmax2    = bimax[row];
5905       nrow2    = bilen[row];
5906       low2     = 0;
5907       high2    = nrow2;
5908 
5909       for (j=0; j<n; j++) {
5910         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5911         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5912         if (in[j] >= cstart && in[j] < cend){
5913           col = in[j] - cstart;
5914           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5915         } else if (in[j] < 0) continue;
5916 #if defined(PETSC_USE_DEBUG)
5917         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5918 #endif
5919         else {
5920           if (mat->was_assembled) {
5921             if (!aij->colmap) {
5922               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5923             }
5924 #if defined (PETSC_USE_CTABLE)
5925             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5926 	    col--;
5927 #else
5928             col = aij->colmap[in[j]] - 1;
5929 #endif
5930             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5931               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5932               col =  in[j];
5933               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5934               B = aij->B;
5935               b = (Mat_SeqAIJ*)B->data;
5936               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5937               rp2      = bj + bi[row];
5938               ap2      = ba + bi[row];
5939               rmax2    = bimax[row];
5940               nrow2    = bilen[row];
5941               low2     = 0;
5942               high2    = nrow2;
5943               bm       = aij->B->rmap->n;
5944               ba = b->a;
5945             }
5946           } else col = in[j];
5947           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5948         }
5949       }
5950     } else {
5951       if (!aij->donotstash) {
5952         if (roworiented) {
5953           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5954         } else {
5955           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5956         }
5957       }
5958     }
5959   }}
5960   PetscFunctionReturnVoid();
5961 }
5962 EXTERN_C_END
5963 
5964