xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision bcaeba4d41d6ca6c6dc4189db20683073a9959ce)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233 	for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236 	  jj++;
237 	}
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267 	for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270 	  jj++;
271 	}
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++){
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else             low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414 { \
415     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
416     lastcol2 = col;\
417     while (high2-low2 > 5) { \
418       t = (low2+high2)/2; \
419       if (rp2[t] > col) high2 = t; \
420       else             low2  = t; \
421     } \
422     for (_i=low2; _i<high2; _i++) {		\
423       if (rp2[_i] > col) break;			\
424       if (rp2[_i] == col) {			      \
425 	if (addv == ADD_VALUES) ap2[_i] += value;     \
426 	else                    ap2[_i] = value;      \
427 	goto b_noinsert;			      \
428       }						      \
429     }							      \
430     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
432     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
433     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
434     N = nrow2++ - 1; b->nz++; high2++;					\
435     /* shift up all the later entries in this row */			\
436     for (ii=N; ii>=_i; ii--) {						\
437       rp2[ii+1] = rp2[ii];						\
438       ap2[ii+1] = ap2[ii];						\
439     }									\
440     rp2[_i] = col;							\
441     ap2[_i] = value;							\
442     b_noinsert: ;								\
443     bilen[row] = nrow2;							\
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat            A = aij->A;
487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
488   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar      *aa = a->a;
490   PetscBool      ignorezeroentries = a->ignorezeroentries;
491   Mat            B = aij->B;
492   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
493   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar      *ba = b->a;
495 
496   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt       nonew;
498   MatScalar      *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend){
528           col = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined (PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542 	    col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B = aij->B;
551               b = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2      = bj + bi[row];
554               ap2      = ba + bi[row];
555               rmax2    = bimax[row];
556               nrow2    = bilen[row];
557               low2     = 0;
558               high2    = nrow2;
559               bm       = aij->B->rmap->n;
560               ba = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend){
601           col = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined (PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col --;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else {
620       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscErrorCode ierr;
634   PetscInt       nstash,reallocs;
635   InsertMode     addv;
636 
637   PetscFunctionBegin;
638   if (aij->donotstash || mat->nooffprocentries) {
639     PetscFunctionReturn(0);
640   }
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n;) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
724 {
725   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
726   PetscErrorCode ierr;
727 
728   PetscFunctionBegin;
729   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
730   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatZeroRows_MPIAIJ"
736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
737 {
738   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
739   PetscErrorCode    ierr;
740   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
741   PetscInt          i,*owners = A->rmap->range;
742   PetscInt          *nprocs,j,idx,nsends,row;
743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
744   PetscInt          *rvalues,count,base,slen,*source;
745   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
746   MPI_Comm          comm = ((PetscObject)A)->comm;
747   MPI_Request       *send_waits,*recv_waits;
748   MPI_Status        recv_status,*send_status;
749   const PetscScalar *xx;
750   PetscScalar       *bb;
751 #if defined(PETSC_DEBUG)
752   PetscBool      found = PETSC_FALSE;
753 #endif
754 
755   PetscFunctionBegin;
756   /*  first count number of contributors to each processor */
757   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
758   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
759   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
760   j = 0;
761   for (i=0; i<N; i++) {
762     if (lastidx > (idx = rows[i])) j = 0;
763     lastidx = idx;
764     for (; j<size; j++) {
765       if (idx >= owners[j] && idx < owners[j+1]) {
766         nprocs[2*j]++;
767         nprocs[2*j+1] = 1;
768         owner[i] = j;
769 #if defined(PETSC_DEBUG)
770         found = PETSC_TRUE;
771 #endif
772         break;
773       }
774     }
775 #if defined(PETSC_DEBUG)
776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
777     found = PETSC_FALSE;
778 #endif
779   }
780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
781 
782   if (A->nooffproczerorows) {
783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
784     nrecvs = nsends;
785     nmax   = N;
786   } else {
787     /* inform other processors of number of messages and max length*/
788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
789   }
790 
791   /* post receives:   */
792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
794   for (i=0; i<nrecvs; i++) {
795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
796   }
797 
798   /* do sends:
799       1) starts[i] gives the starting index in svalues for stuff going to
800          the ith processor
801   */
802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
805   starts[0] = 0;
806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
807   for (i=0; i<N; i++) {
808     svalues[starts[owner[i]]++] = rows[i];
809   }
810 
811   starts[0] = 0;
812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
813   count = 0;
814   for (i=0; i<size; i++) {
815     if (nprocs[2*i+1]) {
816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
817     }
818   }
819   ierr = PetscFree(starts);CHKERRQ(ierr);
820 
821   base = owners[rank];
822 
823   /*  wait on receives */
824   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
825   count  = nrecvs; slen = 0;
826   while (count) {
827     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
828     /* unpack receives into our local space */
829     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
830     source[imdex]  = recv_status.MPI_SOURCE;
831     lens[imdex]    = n;
832     slen          += n;
833     count--;
834   }
835   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
836 
837   /* move the data into the send scatter */
838   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
839   count = 0;
840   for (i=0; i<nrecvs; i++) {
841     values = rvalues + i*nmax;
842     for (j=0; j<lens[i]; j++) {
843       lrows[count++] = values[j] - base;
844     }
845   }
846   ierr = PetscFree(rvalues);CHKERRQ(ierr);
847   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
848   ierr = PetscFree(owner);CHKERRQ(ierr);
849   ierr = PetscFree(nprocs);CHKERRQ(ierr);
850 
851   /* fix right hand side if needed */
852   if (x && b) {
853     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
855     for (i=0; i<slen; i++) {
856       bb[lrows[i]] = diag*xx[lrows[i]];
857     }
858     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
859     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
860   }
861   /*
862         Zero the required rows. If the "diagonal block" of the matrix
863      is square and the user wishes to set the diagonal we use separate
864      code so that MatSetValues() is not called for each diagonal allocating
865      new memory, thus calling lots of mallocs and slowing things down.
866 
867   */
868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
869   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
871     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
872   } else if (diag != 0.0) {
873     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
875       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
877     }
878     for (i = 0; i < slen; i++) {
879       row  = lrows[i] + rstart;
880       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
881     }
882     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884   } else {
885     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(lrows);CHKERRQ(ierr);
888 
889   /* wait on sends */
890   if (nsends) {
891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
893     ierr = PetscFree(send_status);CHKERRQ(ierr);
894   }
895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
896   ierr = PetscFree(svalues);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
903 {
904   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
905   PetscErrorCode    ierr;
906   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
907   PetscInt          i,*owners = A->rmap->range;
908   PetscInt          *nprocs,j,idx,nsends;
909   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
910   PetscInt          *rvalues,count,base,slen,*source;
911   PetscInt          *lens,*lrows,*values,m;
912   MPI_Comm          comm = ((PetscObject)A)->comm;
913   MPI_Request       *send_waits,*recv_waits;
914   MPI_Status        recv_status,*send_status;
915   const PetscScalar *xx;
916   PetscScalar       *bb,*mask;
917   Vec               xmask,lmask;
918   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
919   const PetscInt    *aj, *ii,*ridx;
920   PetscScalar       *aa;
921 #if defined(PETSC_DEBUG)
922   PetscBool         found = PETSC_FALSE;
923 #endif
924 
925   PetscFunctionBegin;
926   /*  first count number of contributors to each processor */
927   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
928   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
929   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
930   j = 0;
931   for (i=0; i<N; i++) {
932     if (lastidx > (idx = rows[i])) j = 0;
933     lastidx = idx;
934     for (; j<size; j++) {
935       if (idx >= owners[j] && idx < owners[j+1]) {
936         nprocs[2*j]++;
937         nprocs[2*j+1] = 1;
938         owner[i] = j;
939 #if defined(PETSC_DEBUG)
940         found = PETSC_TRUE;
941 #endif
942         break;
943       }
944     }
945 #if defined(PETSC_DEBUG)
946     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
947     found = PETSC_FALSE;
948 #endif
949   }
950   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
951 
952   /* inform other processors of number of messages and max length*/
953   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
954 
955   /* post receives:   */
956   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
957   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
958   for (i=0; i<nrecvs; i++) {
959     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
960   }
961 
962   /* do sends:
963       1) starts[i] gives the starting index in svalues for stuff going to
964          the ith processor
965   */
966   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
967   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
968   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
969   starts[0] = 0;
970   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
971   for (i=0; i<N; i++) {
972     svalues[starts[owner[i]]++] = rows[i];
973   }
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count  = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994     source[imdex]  = recv_status.MPI_SOURCE;
995     lens[imdex]    = n;
996     slen          += n;
997     count--;
998   }
999   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1000 
1001   /* move the data into the send scatter */
1002   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1003   count = 0;
1004   for (i=0; i<nrecvs; i++) {
1005     values = rvalues + i*nmax;
1006     for (j=0; j<lens[i]; j++) {
1007       lrows[count++] = values[j] - base;
1008     }
1009   }
1010   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1011   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1012   ierr = PetscFree(owner);CHKERRQ(ierr);
1013   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1014   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1015 
1016   /* zero diagonal part of matrix */
1017   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1018 
1019   /* handle off diagonal part of matrix */
1020   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1021   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1022   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1023   for (i=0; i<slen; i++) {
1024     bb[lrows[i]] = 1;
1025   }
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use){
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++){
1050       n   = ii[i+1] - ii[i];
1051       aj  = aij->j + ii[i];
1052       aa  = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa        = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n   = ii[i+1] - ii[i];
1068       aj  = aij->j + ii[i];
1069       aa  = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa    = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1179 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1180 {
1181   MPI_Comm       comm;
1182   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1183   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184   IS             Me,Notme;
1185   PetscErrorCode ierr;
1186   PetscInt       M,N,first,last,*notme,i;
1187   PetscMPIInt    size;
1188 
1189   PetscFunctionBegin;
1190 
1191   /* Easy test: symmetric diagonal block */
1192   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1193   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1194   if (!*f) PetscFunctionReturn(0);
1195   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1197   if (size == 1) PetscFunctionReturn(0);
1198 
1199   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1200   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1201   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1202   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1203   for (i=0; i<first; i++) notme[i] = i;
1204   for (i=last; i<M; i++) notme[i-last+first] = i;
1205   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1206   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1207   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1208   Aoff = Aoffs[0];
1209   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1210   Boff = Boffs[0];
1211   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1212   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1213   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1214   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1215   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1216   ierr = PetscFree(notme);CHKERRQ(ierr);
1217    PetscFunctionReturn(0);
1218 }
1219 EXTERN_C_END
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1224 {
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   /* do nondiagonal part */
1230   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1231   /* send it on its way */
1232   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   /* do local part */
1234   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1235   /* receive remote parts */
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*
1241   This only works correctly for square matrices where the subblock A->A is the
1242    diagonal block
1243 */
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1247 {
1248   PetscErrorCode ierr;
1249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1250 
1251   PetscFunctionBegin;
1252   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1253   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatScale_MPIAIJ"
1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1261 {
1262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1267   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDestroy_MPIAIJ"
1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1274 {
1275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279 #if defined(PETSC_USE_LOG)
1280   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1281 #endif
1282   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1283   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1284   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1285   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1286 #if defined (PETSC_USE_CTABLE)
1287   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1288 #else
1289   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1292   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1293   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1294   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1295   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1313 {
1314   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1316   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1319   int               fd;
1320   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1321   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1322   PetscScalar       *column_values;
1323   PetscInt          message_count,flowcontrolcount;
1324   FILE              *file;
1325 
1326   PetscFunctionBegin;
1327   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1328   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1329   nz   = A->nz + B->nz;
1330   if (!rank) {
1331     header[0] = MAT_FILE_CLASSID;
1332     header[1] = mat->rmap->N;
1333     header[2] = mat->cmap->N;
1334     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1335     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1336     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     /* get largest number of rows any processor has */
1338     rlen = mat->rmap->n;
1339     range = mat->rmap->range;
1340     for (i=1; i<size; i++) {
1341       rlen = PetscMax(rlen,range[i+1] - range[i]);
1342     }
1343   } else {
1344     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     rlen = mat->rmap->n;
1346   }
1347 
1348   /* load up the local row counts */
1349   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1350   for (i=0; i<mat->rmap->n; i++) {
1351     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352   }
1353 
1354   /* store the row lengths to the file */
1355   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1356   if (!rank) {
1357     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     for (i=1; i<size; i++) {
1359       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1360       rlen = range[i+1] - range[i];
1361       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1362       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1363     }
1364     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1367     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1368     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1369   }
1370   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1371 
1372   /* load up the local column indices */
1373   nzmax = nz; /* )th processor needs space a largest processor needs */
1374   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1376   cnt  = 0;
1377   for (i=0; i<mat->rmap->n; i++) {
1378     for (j=B->i[i]; j<B->i[i+1]; j++) {
1379       if ( (col = garray[B->j[j]]) > cstart) break;
1380       column_indices[cnt++] = col;
1381     }
1382     for (k=A->i[i]; k<A->i[i+1]; k++) {
1383       column_indices[cnt++] = A->j[k] + cstart;
1384     }
1385     for (; j<B->i[i+1]; j++) {
1386       column_indices[cnt++] = garray[B->j[j]];
1387     }
1388   }
1389   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1390 
1391   /* store the column indices to the file */
1392    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1393   if (!rank) {
1394     MPI_Status status;
1395     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1396     for (i=1; i<size; i++) {
1397       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1398       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1399       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1400       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1401       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1402     }
1403      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1406     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1408     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1409   }
1410   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1411 
1412   /* load up the local column values */
1413   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1414   cnt  = 0;
1415   for (i=0; i<mat->rmap->n; i++) {
1416     for (j=B->i[i]; j<B->i[i+1]; j++) {
1417       if ( garray[B->j[j]] > cstart) break;
1418       column_values[cnt++] = B->a[j];
1419     }
1420     for (k=A->i[i]; k<A->i[i+1]; k++) {
1421       column_values[cnt++] = A->a[k];
1422     }
1423     for (; j<B->i[i+1]; j++) {
1424       column_values[cnt++] = B->a[j];
1425     }
1426   }
1427   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1428 
1429   /* store the column values to the file */
1430    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1431   if (!rank) {
1432     MPI_Status status;
1433     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1434     for (i=1; i<size; i++) {
1435        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1436       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1437       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1438       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1439       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1440     }
1441     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1442   } else {
1443     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1444     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1445     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1446     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1447   }
1448   ierr = PetscFree(column_values);CHKERRQ(ierr);
1449 
1450   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1451   if (file) {
1452     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1453   }
1454 
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1461 {
1462   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1463   PetscErrorCode    ierr;
1464   PetscMPIInt       rank = aij->rank,size = aij->size;
1465   PetscBool         isdraw,iascii,isbinary;
1466   PetscViewer       sviewer;
1467   PetscViewerFormat format;
1468 
1469   PetscFunctionBegin;
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1473   if (iascii) {
1474     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1475     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1476       MatInfo    info;
1477       PetscBool  inodes;
1478 
1479       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1480       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1482       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1483       if (!inodes) {
1484         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1485 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1486       } else {
1487         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1488 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1489       }
1490       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1491       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1492       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1493       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1494       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1495       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1497       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1498       PetscFunctionReturn(0);
1499     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1500       PetscInt   inodecount,inodelimit,*inodes;
1501       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1502       if (inodes) {
1503         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1504       } else {
1505         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1506       }
1507       PetscFunctionReturn(0);
1508     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1509       PetscFunctionReturn(0);
1510     }
1511   } else if (isbinary) {
1512     if (size == 1) {
1513       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1514       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1515     } else {
1516       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1517     }
1518     PetscFunctionReturn(0);
1519   } else if (isdraw) {
1520     PetscDraw  draw;
1521     PetscBool  isnull;
1522     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1523     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1524   }
1525 
1526   if (size == 1) {
1527     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1528     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1529   } else {
1530     /* assemble the entire matrix onto first processor. */
1531     Mat         A;
1532     Mat_SeqAIJ  *Aloc;
1533     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1534     MatScalar   *a;
1535 
1536     if (mat->rmap->N > 1024) {
1537       PetscBool  flg = PETSC_FALSE;
1538 
1539       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1540       if (!flg) {
1541         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1542       }
1543     }
1544 
1545     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1546     if (!rank) {
1547       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1548     } else {
1549       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1550     }
1551     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1552     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1553     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1554     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1555     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1556 
1557     /* copy over the A part */
1558     Aloc = (Mat_SeqAIJ*)aij->A->data;
1559     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1560     row = mat->rmap->rstart;
1561     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1562     for (i=0; i<m; i++) {
1563       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1564       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1565     }
1566     aj = Aloc->j;
1567     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1568 
1569     /* copy over the B part */
1570     Aloc = (Mat_SeqAIJ*)aij->B->data;
1571     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1572     row  = mat->rmap->rstart;
1573     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1574     ct   = cols;
1575     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1576     for (i=0; i<m; i++) {
1577       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1578       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1579     }
1580     ierr = PetscFree(ct);CHKERRQ(ierr);
1581     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583     /*
1584        Everyone has to call to draw the matrix since the graphics waits are
1585        synchronized across all processors that share the PetscDraw object
1586     */
1587     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1588     if (!rank) {
1589       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1590       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1591       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1592       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1593     }
1594     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1595     ierr = MatDestroy(&A);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatView_MPIAIJ"
1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1603 {
1604   PetscErrorCode ierr;
1605   PetscBool      iascii,isdraw,issocket,isbinary;
1606 
1607   PetscFunctionBegin;
1608   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1609   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1610   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1611   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1612   if (iascii || isdraw || isbinary || issocket) {
1613     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1614   } else {
1615     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatSOR_MPIAIJ"
1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   PetscErrorCode ierr;
1626   Vec            bb1 = 0;
1627   PetscBool      hasop;
1628 
1629   PetscFunctionBegin;
1630   if (flag == SOR_APPLY_UPPER) {
1631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1632     PetscFunctionReturn(0);
1633   }
1634 
1635   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1636     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1637   }
1638 
1639   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644 
1645     while (its--) {
1646       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1648 
1649       /* update rhs: bb1 = bb - B*x */
1650       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1651       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1652 
1653       /* local sweep */
1654       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1655     }
1656   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1657     if (flag & SOR_ZERO_INITIAL_GUESS) {
1658       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1659       its--;
1660     }
1661     while (its--) {
1662       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664 
1665       /* update rhs: bb1 = bb - B*x */
1666       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1667       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1668 
1669       /* local sweep */
1670       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1671     }
1672   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1673     if (flag & SOR_ZERO_INITIAL_GUESS) {
1674       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1675       its--;
1676     }
1677     while (its--) {
1678       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1680 
1681       /* update rhs: bb1 = bb - B*x */
1682       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1683       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1684 
1685       /* local sweep */
1686       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1687     }
1688   }  else if (flag & SOR_EISENSTAT) {
1689     Vec         xx1;
1690 
1691     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1692     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1693 
1694     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696     if (!mat->diag) {
1697       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1698       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1699     }
1700     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1701     if (hasop) {
1702       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1703     } else {
1704       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1705     }
1706     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1707 
1708     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1709 
1710     /* local sweep */
1711     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1712     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1713     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1714   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1715 
1716   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatPermute_MPIAIJ"
1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1723 {
1724   Mat            aA,aB,Aperm;
1725   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1726   PetscScalar    *aa,*ba;
1727   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1728   PetscSF        rowsf,sf;
1729   IS             parcolp = PETSC_NULL;
1730   PetscBool      done;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1735   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1736   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1737   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1738 
1739   /* Invert row permutation to find out where my rows should go */
1740   ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr);
1741   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1742   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1743   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1744   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1745 
1746   /* Invert column permutation to find out where my columns should go */
1747   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1748   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1749   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1750   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1751   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1752   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1753 
1754   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1755   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1756   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1757 
1758   /* Find out where my gcols should go */
1759   ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr);
1760   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1761   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1762   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1763   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1764   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1765   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1766 
1767   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1768   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1769   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1770   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1771   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1772   for (i=0; i<m; i++) {
1773     PetscInt row = rdest[i],rowner;
1774     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1775     for (j=ai[i]; j<ai[i+1]; j++) {
1776       PetscInt cowner,col = cdest[aj[j]];
1777       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1778       if (rowner == cowner) dnnz[i]++;
1779       else onnz[i]++;
1780     }
1781     for (j=bi[i]; j<bi[i+1]; j++) {
1782       PetscInt cowner,col = gcdest[bj[j]];
1783       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1784       if (rowner == cowner) dnnz[i]++;
1785       else onnz[i]++;
1786     }
1787   }
1788   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1789   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1790   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1791   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1792   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1793   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1794   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1795 
1796   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1797   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1798   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1799   for (i=0; i<m; i++) {
1800     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1801     PetscInt rowlen;
1802     rowlen = ai[i+1] - ai[i];
1803     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1804     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1805     rowlen = bi[i+1] - bi[i];
1806     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1807     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1808   }
1809   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1810   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1811   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1812   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1813   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1814   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1815   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1816   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1817   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1818   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1819   *B = Aperm;
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1825 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1826 {
1827   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1828   Mat            A = mat->A,B = mat->B;
1829   PetscErrorCode ierr;
1830   PetscReal      isend[5],irecv[5];
1831 
1832   PetscFunctionBegin;
1833   info->block_size     = 1.0;
1834   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1835   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1836   isend[3] = info->memory;  isend[4] = info->mallocs;
1837   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1838   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1839   isend[3] += info->memory;  isend[4] += info->mallocs;
1840   if (flag == MAT_LOCAL) {
1841     info->nz_used      = isend[0];
1842     info->nz_allocated = isend[1];
1843     info->nz_unneeded  = isend[2];
1844     info->memory       = isend[3];
1845     info->mallocs      = isend[4];
1846   } else if (flag == MAT_GLOBAL_MAX) {
1847     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1848     info->nz_used      = irecv[0];
1849     info->nz_allocated = irecv[1];
1850     info->nz_unneeded  = irecv[2];
1851     info->memory       = irecv[3];
1852     info->mallocs      = irecv[4];
1853   } else if (flag == MAT_GLOBAL_SUM) {
1854     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1855     info->nz_used      = irecv[0];
1856     info->nz_allocated = irecv[1];
1857     info->nz_unneeded  = irecv[2];
1858     info->memory       = irecv[3];
1859     info->mallocs      = irecv[4];
1860   }
1861   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1862   info->fill_ratio_needed = 0;
1863   info->factor_mallocs    = 0;
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatSetOption_MPIAIJ"
1869 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1870 {
1871   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1872   PetscErrorCode ierr;
1873 
1874   PetscFunctionBegin;
1875   switch (op) {
1876   case MAT_NEW_NONZERO_LOCATIONS:
1877   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1878   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1879   case MAT_KEEP_NONZERO_PATTERN:
1880   case MAT_NEW_NONZERO_LOCATION_ERR:
1881   case MAT_USE_INODES:
1882   case MAT_IGNORE_ZERO_ENTRIES:
1883     MatCheckPreallocated(A,1);
1884     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1885     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1886     break;
1887   case MAT_ROW_ORIENTED:
1888     a->roworiented = flg;
1889     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1890     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1891     break;
1892   case MAT_NEW_DIAGONALS:
1893     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1894     break;
1895   case MAT_IGNORE_OFF_PROC_ENTRIES:
1896     a->donotstash = flg;
1897     break;
1898   case MAT_SPD:
1899     A->spd_set                         = PETSC_TRUE;
1900     A->spd                             = flg;
1901     if (flg) {
1902       A->symmetric                     = PETSC_TRUE;
1903       A->structurally_symmetric        = PETSC_TRUE;
1904       A->symmetric_set                 = PETSC_TRUE;
1905       A->structurally_symmetric_set    = PETSC_TRUE;
1906     }
1907     break;
1908   case MAT_SYMMETRIC:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   case MAT_STRUCTURALLY_SYMMETRIC:
1912     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1913     break;
1914   case MAT_HERMITIAN:
1915     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1916     break;
1917   case MAT_SYMMETRY_ETERNAL:
1918     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1919     break;
1920   default:
1921     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1922   }
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatGetRow_MPIAIJ"
1928 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1929 {
1930   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1931   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1932   PetscErrorCode ierr;
1933   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1934   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1935   PetscInt       *cmap,*idx_p;
1936 
1937   PetscFunctionBegin;
1938   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1939   mat->getrowactive = PETSC_TRUE;
1940 
1941   if (!mat->rowvalues && (idx || v)) {
1942     /*
1943         allocate enough space to hold information from the longest row.
1944     */
1945     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1946     PetscInt   max = 1,tmp;
1947     for (i=0; i<matin->rmap->n; i++) {
1948       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1949       if (max < tmp) { max = tmp; }
1950     }
1951     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1952   }
1953 
1954   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1955   lrow = row - rstart;
1956 
1957   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1958   if (!v)   {pvA = 0; pvB = 0;}
1959   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1960   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1961   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1962   nztot = nzA + nzB;
1963 
1964   cmap  = mat->garray;
1965   if (v  || idx) {
1966     if (nztot) {
1967       /* Sort by increasing column numbers, assuming A and B already sorted */
1968       PetscInt imark = -1;
1969       if (v) {
1970         *v = v_p = mat->rowvalues;
1971         for (i=0; i<nzB; i++) {
1972           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1973           else break;
1974         }
1975         imark = i;
1976         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1977         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1978       }
1979       if (idx) {
1980         *idx = idx_p = mat->rowindices;
1981         if (imark > -1) {
1982           for (i=0; i<imark; i++) {
1983             idx_p[i] = cmap[cworkB[i]];
1984           }
1985         } else {
1986           for (i=0; i<nzB; i++) {
1987             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1988             else break;
1989           }
1990           imark = i;
1991         }
1992         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1993         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1994       }
1995     } else {
1996       if (idx) *idx = 0;
1997       if (v)   *v   = 0;
1998     }
1999   }
2000   *nz = nztot;
2001   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
2002   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
2008 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2009 {
2010   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2011 
2012   PetscFunctionBegin;
2013   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2014   aij->getrowactive = PETSC_FALSE;
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #undef __FUNCT__
2019 #define __FUNCT__ "MatNorm_MPIAIJ"
2020 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2021 {
2022   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2023   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2024   PetscErrorCode ierr;
2025   PetscInt       i,j,cstart = mat->cmap->rstart;
2026   PetscReal      sum = 0.0;
2027   MatScalar      *v;
2028 
2029   PetscFunctionBegin;
2030   if (aij->size == 1) {
2031     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2032   } else {
2033     if (type == NORM_FROBENIUS) {
2034       v = amat->a;
2035       for (i=0; i<amat->nz; i++) {
2036 #if defined(PETSC_USE_COMPLEX)
2037         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2038 #else
2039         sum += (*v)*(*v); v++;
2040 #endif
2041       }
2042       v = bmat->a;
2043       for (i=0; i<bmat->nz; i++) {
2044 #if defined(PETSC_USE_COMPLEX)
2045         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2046 #else
2047         sum += (*v)*(*v); v++;
2048 #endif
2049       }
2050       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2051       *norm = PetscSqrtReal(*norm);
2052     } else if (type == NORM_1) { /* max column norm */
2053       PetscReal *tmp,*tmp2;
2054       PetscInt  *jj,*garray = aij->garray;
2055       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2056       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2057       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2058       *norm = 0.0;
2059       v = amat->a; jj = amat->j;
2060       for (j=0; j<amat->nz; j++) {
2061         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2062       }
2063       v = bmat->a; jj = bmat->j;
2064       for (j=0; j<bmat->nz; j++) {
2065         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2066       }
2067       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2068       for (j=0; j<mat->cmap->N; j++) {
2069         if (tmp2[j] > *norm) *norm = tmp2[j];
2070       }
2071       ierr = PetscFree(tmp);CHKERRQ(ierr);
2072       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2073     } else if (type == NORM_INFINITY) { /* max row norm */
2074       PetscReal ntemp = 0.0;
2075       for (j=0; j<aij->A->rmap->n; j++) {
2076         v = amat->a + amat->i[j];
2077         sum = 0.0;
2078         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2079           sum += PetscAbsScalar(*v); v++;
2080         }
2081         v = bmat->a + bmat->i[j];
2082         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2083           sum += PetscAbsScalar(*v); v++;
2084         }
2085         if (sum > ntemp) ntemp = sum;
2086       }
2087       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2088     } else {
2089       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2090     }
2091   }
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatTranspose_MPIAIJ"
2097 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2098 {
2099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2100   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2101   PetscErrorCode ierr;
2102   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2103   PetscInt       cstart=A->cmap->rstart,ncol;
2104   Mat            B;
2105   MatScalar      *array;
2106 
2107   PetscFunctionBegin;
2108   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2109 
2110   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2111   ai = Aloc->i; aj = Aloc->j;
2112   bi = Bloc->i; bj = Bloc->j;
2113   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2114     PetscInt *d_nnz,*g_nnz,*o_nnz;
2115     PetscSFNode *oloc;
2116     PETSC_UNUSED PetscSF sf;
2117 
2118     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2119     /* compute d_nnz for preallocation */
2120     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2121     for (i=0; i<ai[ma]; i++) {
2122       d_nnz[aj[i]] ++;
2123       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2124     }
2125 #if defined(OMPI_MAJOR_VERSION) /* Open MPI Bug. https://bitbucket.org/petsc/petsc-dev/issue/9/implement-petscsf-without-one-sided */
2126     ierr = PetscMemcpy(o_nnz,d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2127 #else
2128     /* compute local off-diagonal contributions */
2129     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2130     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2131     /* map those to global */
2132     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2133     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2134     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2135     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2136     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2137     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2138 #endif
2139 
2140     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2141     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2142     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2143     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2144     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2145     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2146 #if defined(OMPI_MAJOR_VERSION)
2147     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2148 #endif
2149   } else {
2150     B = *matout;
2151     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2152     for (i=0; i<ai[ma]; i++){
2153       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2154     }
2155   }
2156 
2157   /* copy over the A part */
2158   array = Aloc->a;
2159   row = A->rmap->rstart;
2160   for (i=0; i<ma; i++) {
2161     ncol = ai[i+1]-ai[i];
2162     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2163     row++; array += ncol; aj += ncol;
2164   }
2165   aj = Aloc->j;
2166   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2167 
2168   /* copy over the B part */
2169   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2170   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2171   array = Bloc->a;
2172   row = A->rmap->rstart;
2173   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2174   cols_tmp = cols;
2175   for (i=0; i<mb; i++) {
2176     ncol = bi[i+1]-bi[i];
2177     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2178     row++; array += ncol; cols_tmp += ncol;
2179   }
2180   ierr = PetscFree(cols);CHKERRQ(ierr);
2181 
2182   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2183   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2184   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2185     *matout = B;
2186   } else {
2187     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2188   }
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2194 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2195 {
2196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2197   Mat            a = aij->A,b = aij->B;
2198   PetscErrorCode ierr;
2199   PetscInt       s1,s2,s3;
2200 
2201   PetscFunctionBegin;
2202   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2203   if (rr) {
2204     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2205     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2206     /* Overlap communication with computation. */
2207     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2208   }
2209   if (ll) {
2210     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2211     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2212     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2213   }
2214   /* scale  the diagonal block */
2215   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2216 
2217   if (rr) {
2218     /* Do a scatter end and then right scale the off-diagonal block */
2219     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2220     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2221   }
2222 
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2228 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2229 {
2230   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2231   PetscErrorCode ierr;
2232 
2233   PetscFunctionBegin;
2234   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatEqual_MPIAIJ"
2240 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2241 {
2242   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2243   Mat            a,b,c,d;
2244   PetscBool      flg;
2245   PetscErrorCode ierr;
2246 
2247   PetscFunctionBegin;
2248   a = matA->A; b = matA->B;
2249   c = matB->A; d = matB->B;
2250 
2251   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2252   if (flg) {
2253     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2254   }
2255   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatCopy_MPIAIJ"
2261 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2262 {
2263   PetscErrorCode ierr;
2264   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2265   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2266 
2267   PetscFunctionBegin;
2268   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2269   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2270     /* because of the column compression in the off-processor part of the matrix a->B,
2271        the number of columns in a->B and b->B may be different, hence we cannot call
2272        the MatCopy() directly on the two parts. If need be, we can provide a more
2273        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2274        then copying the submatrices */
2275     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2276   } else {
2277     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2278     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2279   }
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 #undef __FUNCT__
2284 #define __FUNCT__ "MatSetUp_MPIAIJ"
2285 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2286 {
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2296 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2297 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2298 {
2299   PetscInt          i,m=Y->rmap->N;
2300   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2301   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2302   const PetscInt    *xi = x->i,*yi = y->i;
2303 
2304   PetscFunctionBegin;
2305   /* Set the number of nonzeros in the new matrix */
2306   for (i=0; i<m; i++) {
2307     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2308     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2309     nnz[i] = 0;
2310     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2311       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2312       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2313       nnz[i]++;
2314     }
2315     for (; k<nzy; k++) nnz[i]++;
2316   }
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 #undef __FUNCT__
2321 #define __FUNCT__ "MatAXPY_MPIAIJ"
2322 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2323 {
2324   PetscErrorCode ierr;
2325   PetscInt       i;
2326   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2327   PetscBLASInt   bnz,one=1;
2328   Mat_SeqAIJ     *x,*y;
2329 
2330   PetscFunctionBegin;
2331   if (str == SAME_NONZERO_PATTERN) {
2332     PetscScalar alpha = a;
2333     x = (Mat_SeqAIJ *)xx->A->data;
2334     y = (Mat_SeqAIJ *)yy->A->data;
2335     bnz = PetscBLASIntCast(x->nz);
2336     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2337     x = (Mat_SeqAIJ *)xx->B->data;
2338     y = (Mat_SeqAIJ *)yy->B->data;
2339     bnz = PetscBLASIntCast(x->nz);
2340     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2341   } else if (str == SUBSET_NONZERO_PATTERN) {
2342     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2343 
2344     x = (Mat_SeqAIJ *)xx->B->data;
2345     y = (Mat_SeqAIJ *)yy->B->data;
2346     if (y->xtoy && y->XtoY != xx->B) {
2347       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2348       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2349     }
2350     if (!y->xtoy) { /* get xtoy */
2351       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2352       y->XtoY = xx->B;
2353       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2354     }
2355     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2356   } else {
2357     Mat B;
2358     PetscInt *nnz_d,*nnz_o;
2359     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2360     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2361     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2362     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2363     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2364     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2365     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2366     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2367     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2368     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2369     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2370     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2371     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2372     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2373   }
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatConjugate_MPIAIJ"
2381 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2382 {
2383 #if defined(PETSC_USE_COMPLEX)
2384   PetscErrorCode ierr;
2385   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2386 
2387   PetscFunctionBegin;
2388   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2389   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2390 #else
2391   PetscFunctionBegin;
2392 #endif
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatRealPart_MPIAIJ"
2398 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2399 {
2400   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2401   PetscErrorCode ierr;
2402 
2403   PetscFunctionBegin;
2404   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2405   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2411 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2412 {
2413   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2414   PetscErrorCode ierr;
2415 
2416   PetscFunctionBegin;
2417   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2418   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 #ifdef PETSC_HAVE_PBGL
2423 
2424 #include <boost/parallel/mpi/bsp_process_group.hpp>
2425 #include <boost/graph/distributed/ilu_default_graph.hpp>
2426 #include <boost/graph/distributed/ilu_0_block.hpp>
2427 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2428 #include <boost/graph/distributed/petsc/interface.hpp>
2429 #include <boost/multi_array.hpp>
2430 #include <boost/parallel/distributed_property_map->hpp>
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2434 /*
2435   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2436 */
2437 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2438 {
2439   namespace petsc = boost::distributed::petsc;
2440 
2441   namespace graph_dist = boost::graph::distributed;
2442   using boost::graph::distributed::ilu_default::process_group_type;
2443   using boost::graph::ilu_permuted;
2444 
2445   PetscBool       row_identity, col_identity;
2446   PetscContainer  c;
2447   PetscInt        m, n, M, N;
2448   PetscErrorCode  ierr;
2449 
2450   PetscFunctionBegin;
2451   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2452   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2453   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2454   if (!row_identity || !col_identity) {
2455     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2456   }
2457 
2458   process_group_type pg;
2459   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2460   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2461   lgraph_type&   level_graph = *lgraph_p;
2462   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2463 
2464   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2465   ilu_permuted(level_graph);
2466 
2467   /* put together the new matrix */
2468   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2469   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2470   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2471   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2472   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2473   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2474   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2475   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2476 
2477   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2478   ierr = PetscContainerSetPointer(c, lgraph_p);
2479   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2480   ierr = PetscContainerDestroy(&c);
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2486 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2487 {
2488   PetscFunctionBegin;
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "MatSolve_MPIAIJ"
2494 /*
2495   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2496 */
2497 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2498 {
2499   namespace graph_dist = boost::graph::distributed;
2500 
2501   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2502   lgraph_type*   lgraph_p;
2503   PetscContainer c;
2504   PetscErrorCode ierr;
2505 
2506   PetscFunctionBegin;
2507   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2508   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2509   ierr = VecCopy(b, x);CHKERRQ(ierr);
2510 
2511   PetscScalar* array_x;
2512   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2513   PetscInt sx;
2514   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2515 
2516   PetscScalar* array_b;
2517   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2518   PetscInt sb;
2519   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2520 
2521   lgraph_type&   level_graph = *lgraph_p;
2522   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2523 
2524   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2525   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2526                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2527 
2528   typedef boost::iterator_property_map<array_ref_type::iterator,
2529                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2530   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2531                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2532 
2533   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2534 
2535   PetscFunctionReturn(0);
2536 }
2537 #endif
2538 
2539 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2540   PetscInt       nzlocal,nsends,nrecvs;
2541   PetscMPIInt    *send_rank,*recv_rank;
2542   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2543   PetscScalar    *sbuf_a,**rbuf_a;
2544   PetscErrorCode (*Destroy)(Mat);
2545 } Mat_Redundant;
2546 
2547 #undef __FUNCT__
2548 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2549 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2550 {
2551   PetscErrorCode       ierr;
2552   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2553   PetscInt             i;
2554 
2555   PetscFunctionBegin;
2556   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2557   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2558   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2559   for (i=0; i<redund->nrecvs; i++){
2560     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2561     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2562   }
2563   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2564   ierr = PetscFree(redund);CHKERRQ(ierr);
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 #undef __FUNCT__
2569 #define __FUNCT__ "MatDestroy_MatRedundant"
2570 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2571 {
2572   PetscErrorCode  ierr;
2573   PetscContainer  container;
2574   Mat_Redundant   *redund=PETSC_NULL;
2575 
2576   PetscFunctionBegin;
2577   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2578   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2579   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2580   A->ops->destroy = redund->Destroy;
2581   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2582   if (A->ops->destroy) {
2583     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2584   }
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 #undef __FUNCT__
2589 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2590 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2591 {
2592   PetscMPIInt    rank,size;
2593   MPI_Comm       comm=((PetscObject)mat)->comm;
2594   PetscErrorCode ierr;
2595   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2596   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2597   PetscInt       *rowrange=mat->rmap->range;
2598   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2599   Mat            A=aij->A,B=aij->B,C=*matredundant;
2600   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2601   PetscScalar    *sbuf_a;
2602   PetscInt       nzlocal=a->nz+b->nz;
2603   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2604   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2605   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2606   MatScalar      *aworkA,*aworkB;
2607   PetscScalar    *vals;
2608   PetscMPIInt    tag1,tag2,tag3,imdex;
2609   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2610                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2611   MPI_Status     recv_status,*send_status;
2612   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2613   PetscInt       **rbuf_j=PETSC_NULL;
2614   PetscScalar    **rbuf_a=PETSC_NULL;
2615   Mat_Redundant  *redund=PETSC_NULL;
2616   PetscContainer container;
2617 
2618   PetscFunctionBegin;
2619   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2620   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2621 
2622   if (reuse == MAT_REUSE_MATRIX) {
2623     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2624     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2625     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2626     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2627     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2628     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2629     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2630     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2631 
2632     nsends    = redund->nsends;
2633     nrecvs    = redund->nrecvs;
2634     send_rank = redund->send_rank;
2635     recv_rank = redund->recv_rank;
2636     sbuf_nz   = redund->sbuf_nz;
2637     rbuf_nz   = redund->rbuf_nz;
2638     sbuf_j    = redund->sbuf_j;
2639     sbuf_a    = redund->sbuf_a;
2640     rbuf_j    = redund->rbuf_j;
2641     rbuf_a    = redund->rbuf_a;
2642   }
2643 
2644   if (reuse == MAT_INITIAL_MATRIX){
2645     PetscMPIInt  subrank,subsize;
2646     PetscInt     nleftover,np_subcomm;
2647     /* get the destination processors' id send_rank, nsends and nrecvs */
2648     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2649     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2650     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2651     np_subcomm = size/nsubcomm;
2652     nleftover  = size - nsubcomm*np_subcomm;
2653     nsends = 0; nrecvs = 0;
2654     for (i=0; i<size; i++){ /* i=rank*/
2655       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2656         send_rank[nsends] = i; nsends++;
2657         recv_rank[nrecvs++] = i;
2658       }
2659     }
2660     if (rank >= size - nleftover){/* this proc is a leftover processor */
2661       i = size-nleftover-1;
2662       j = 0;
2663       while (j < nsubcomm - nleftover){
2664         send_rank[nsends++] = i;
2665         i--; j++;
2666       }
2667     }
2668 
2669     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2670       for (i=0; i<nleftover; i++){
2671         recv_rank[nrecvs++] = size-nleftover+i;
2672       }
2673     }
2674 
2675     /* allocate sbuf_j, sbuf_a */
2676     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2677     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2678     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2679   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2680 
2681   /* copy mat's local entries into the buffers */
2682   if (reuse == MAT_INITIAL_MATRIX){
2683     rownz_max = 0;
2684     rptr = sbuf_j;
2685     cols = sbuf_j + rend-rstart + 1;
2686     vals = sbuf_a;
2687     rptr[0] = 0;
2688     for (i=0; i<rend-rstart; i++){
2689       row = i + rstart;
2690       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2691       ncols  = nzA + nzB;
2692       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2693       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2694       /* load the column indices for this row into cols */
2695       lwrite = 0;
2696       for (l=0; l<nzB; l++) {
2697         if ((ctmp = bmap[cworkB[l]]) < cstart){
2698           vals[lwrite]   = aworkB[l];
2699           cols[lwrite++] = ctmp;
2700         }
2701       }
2702       for (l=0; l<nzA; l++){
2703         vals[lwrite]   = aworkA[l];
2704         cols[lwrite++] = cstart + cworkA[l];
2705       }
2706       for (l=0; l<nzB; l++) {
2707         if ((ctmp = bmap[cworkB[l]]) >= cend){
2708           vals[lwrite]   = aworkB[l];
2709           cols[lwrite++] = ctmp;
2710         }
2711       }
2712       vals += ncols;
2713       cols += ncols;
2714       rptr[i+1] = rptr[i] + ncols;
2715       if (rownz_max < ncols) rownz_max = ncols;
2716     }
2717     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2718   } else { /* only copy matrix values into sbuf_a */
2719     rptr = sbuf_j;
2720     vals = sbuf_a;
2721     rptr[0] = 0;
2722     for (i=0; i<rend-rstart; i++){
2723       row = i + rstart;
2724       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2725       ncols  = nzA + nzB;
2726       cworkB = b->j + b->i[i];
2727       aworkA = a->a + a->i[i];
2728       aworkB = b->a + b->i[i];
2729       lwrite = 0;
2730       for (l=0; l<nzB; l++) {
2731         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2732       }
2733       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2734       for (l=0; l<nzB; l++) {
2735         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2736       }
2737       vals += ncols;
2738       rptr[i+1] = rptr[i] + ncols;
2739     }
2740   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2741 
2742   /* send nzlocal to others, and recv other's nzlocal */
2743   /*--------------------------------------------------*/
2744   if (reuse == MAT_INITIAL_MATRIX){
2745     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2746     s_waits2 = s_waits3 + nsends;
2747     s_waits1 = s_waits2 + nsends;
2748     r_waits1 = s_waits1 + nsends;
2749     r_waits2 = r_waits1 + nrecvs;
2750     r_waits3 = r_waits2 + nrecvs;
2751   } else {
2752     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2753     r_waits3 = s_waits3 + nsends;
2754   }
2755 
2756   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2757   if (reuse == MAT_INITIAL_MATRIX){
2758     /* get new tags to keep the communication clean */
2759     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2760     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2761     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2762 
2763     /* post receives of other's nzlocal */
2764     for (i=0; i<nrecvs; i++){
2765       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2766     }
2767     /* send nzlocal to others */
2768     for (i=0; i<nsends; i++){
2769       sbuf_nz[i] = nzlocal;
2770       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2771     }
2772     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2773     count = nrecvs;
2774     while (count) {
2775       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2776       recv_rank[imdex] = recv_status.MPI_SOURCE;
2777       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2778       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2779 
2780       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2781       rbuf_nz[imdex] += i + 2;
2782       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2783       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2784       count--;
2785     }
2786     /* wait on sends of nzlocal */
2787     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2788     /* send mat->i,j to others, and recv from other's */
2789     /*------------------------------------------------*/
2790     for (i=0; i<nsends; i++){
2791       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2792       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2793     }
2794     /* wait on receives of mat->i,j */
2795     /*------------------------------*/
2796     count = nrecvs;
2797     while (count) {
2798       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2799       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2800       count--;
2801     }
2802     /* wait on sends of mat->i,j */
2803     /*---------------------------*/
2804     if (nsends) {
2805       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2806     }
2807   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2808 
2809   /* post receives, send and receive mat->a */
2810   /*----------------------------------------*/
2811   for (imdex=0; imdex<nrecvs; imdex++) {
2812     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2813   }
2814   for (i=0; i<nsends; i++){
2815     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2816   }
2817   count = nrecvs;
2818   while (count) {
2819     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2820     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2821     count--;
2822   }
2823   if (nsends) {
2824     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2825   }
2826 
2827   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2828 
2829   /* create redundant matrix */
2830   /*-------------------------*/
2831   if (reuse == MAT_INITIAL_MATRIX){
2832     /* compute rownz_max for preallocation */
2833     for (imdex=0; imdex<nrecvs; imdex++){
2834       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2835       rptr = rbuf_j[imdex];
2836       for (i=0; i<j; i++){
2837         ncols = rptr[i+1] - rptr[i];
2838         if (rownz_max < ncols) rownz_max = ncols;
2839       }
2840     }
2841 
2842     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2843     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2844     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2845     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2846     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2847     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2848   } else {
2849     C = *matredundant;
2850   }
2851 
2852   /* insert local matrix entries */
2853   rptr = sbuf_j;
2854   cols = sbuf_j + rend-rstart + 1;
2855   vals = sbuf_a;
2856   for (i=0; i<rend-rstart; i++){
2857     row   = i + rstart;
2858     ncols = rptr[i+1] - rptr[i];
2859     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2860     vals += ncols;
2861     cols += ncols;
2862   }
2863   /* insert received matrix entries */
2864   for (imdex=0; imdex<nrecvs; imdex++){
2865     rstart = rowrange[recv_rank[imdex]];
2866     rend   = rowrange[recv_rank[imdex]+1];
2867     rptr = rbuf_j[imdex];
2868     cols = rbuf_j[imdex] + rend-rstart + 1;
2869     vals = rbuf_a[imdex];
2870     for (i=0; i<rend-rstart; i++){
2871       row   = i + rstart;
2872       ncols = rptr[i+1] - rptr[i];
2873       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2874       vals += ncols;
2875       cols += ncols;
2876     }
2877   }
2878   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2879   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2880   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2881   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2882   if (reuse == MAT_INITIAL_MATRIX) {
2883     PetscContainer container;
2884     *matredundant = C;
2885     /* create a supporting struct and attach it to C for reuse */
2886     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2887     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2888     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2889     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2890     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2891     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2892 
2893     redund->nzlocal = nzlocal;
2894     redund->nsends  = nsends;
2895     redund->nrecvs  = nrecvs;
2896     redund->send_rank = send_rank;
2897     redund->recv_rank = recv_rank;
2898     redund->sbuf_nz = sbuf_nz;
2899     redund->rbuf_nz = rbuf_nz;
2900     redund->sbuf_j  = sbuf_j;
2901     redund->sbuf_a  = sbuf_a;
2902     redund->rbuf_j  = rbuf_j;
2903     redund->rbuf_a  = rbuf_a;
2904 
2905     redund->Destroy = C->ops->destroy;
2906     C->ops->destroy = MatDestroy_MatRedundant;
2907   }
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 #undef __FUNCT__
2912 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2913 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2914 {
2915   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2916   PetscErrorCode ierr;
2917   PetscInt       i,*idxb = 0;
2918   PetscScalar    *va,*vb;
2919   Vec            vtmp;
2920 
2921   PetscFunctionBegin;
2922   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2923   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2924   if (idx) {
2925     for (i=0; i<A->rmap->n; i++) {
2926       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2927     }
2928   }
2929 
2930   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2931   if (idx) {
2932     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2933   }
2934   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2935   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2936 
2937   for (i=0; i<A->rmap->n; i++){
2938     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2939       va[i] = vb[i];
2940       if (idx) idx[i] = a->garray[idxb[i]];
2941     }
2942   }
2943 
2944   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2945   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2946   ierr = PetscFree(idxb);CHKERRQ(ierr);
2947   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 #undef __FUNCT__
2952 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2953 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2954 {
2955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2956   PetscErrorCode ierr;
2957   PetscInt       i,*idxb = 0;
2958   PetscScalar    *va,*vb;
2959   Vec            vtmp;
2960 
2961   PetscFunctionBegin;
2962   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2963   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2964   if (idx) {
2965     for (i=0; i<A->cmap->n; i++) {
2966       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2967     }
2968   }
2969 
2970   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2971   if (idx) {
2972     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2973   }
2974   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2975   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2976 
2977   for (i=0; i<A->rmap->n; i++){
2978     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2979       va[i] = vb[i];
2980       if (idx) idx[i] = a->garray[idxb[i]];
2981     }
2982   }
2983 
2984   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2985   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2986   ierr = PetscFree(idxb);CHKERRQ(ierr);
2987   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2988   PetscFunctionReturn(0);
2989 }
2990 
2991 #undef __FUNCT__
2992 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2993 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2994 {
2995   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2996   PetscInt       n      = A->rmap->n;
2997   PetscInt       cstart = A->cmap->rstart;
2998   PetscInt      *cmap   = mat->garray;
2999   PetscInt      *diagIdx, *offdiagIdx;
3000   Vec            diagV, offdiagV;
3001   PetscScalar   *a, *diagA, *offdiagA;
3002   PetscInt       r;
3003   PetscErrorCode ierr;
3004 
3005   PetscFunctionBegin;
3006   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3007   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
3008   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
3009   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3010   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3011   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3012   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3013   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3014   for (r = 0; r < n; ++r) {
3015     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3016       a[r]   = diagA[r];
3017       idx[r] = cstart + diagIdx[r];
3018     } else {
3019       a[r]   = offdiagA[r];
3020       idx[r] = cmap[offdiagIdx[r]];
3021     }
3022   }
3023   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3024   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3025   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3026   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3027   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3028   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3029   PetscFunctionReturn(0);
3030 }
3031 
3032 #undef __FUNCT__
3033 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3034 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3035 {
3036   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
3037   PetscInt       n      = A->rmap->n;
3038   PetscInt       cstart = A->cmap->rstart;
3039   PetscInt      *cmap   = mat->garray;
3040   PetscInt      *diagIdx, *offdiagIdx;
3041   Vec            diagV, offdiagV;
3042   PetscScalar   *a, *diagA, *offdiagA;
3043   PetscInt       r;
3044   PetscErrorCode ierr;
3045 
3046   PetscFunctionBegin;
3047   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3048   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3049   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3050   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3051   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3052   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3053   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3054   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3055   for (r = 0; r < n; ++r) {
3056     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3057       a[r]   = diagA[r];
3058       idx[r] = cstart + diagIdx[r];
3059     } else {
3060       a[r]   = offdiagA[r];
3061       idx[r] = cmap[offdiagIdx[r]];
3062     }
3063   }
3064   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3065   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3066   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3067   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3068   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3069   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 #undef __FUNCT__
3074 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3075 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3076 {
3077   PetscErrorCode ierr;
3078   Mat            *dummy;
3079 
3080   PetscFunctionBegin;
3081   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3082   *newmat = *dummy;
3083   ierr = PetscFree(dummy);CHKERRQ(ierr);
3084   PetscFunctionReturn(0);
3085 }
3086 
3087 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3088 
3089 #undef __FUNCT__
3090 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3091 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3092 {
3093   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3094   PetscErrorCode ierr;
3095 
3096   PetscFunctionBegin;
3097   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3098   PetscFunctionReturn(0);
3099 }
3100 
3101 #undef __FUNCT__
3102 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3103 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3104 {
3105   PetscErrorCode ierr;
3106   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3107 
3108   PetscFunctionBegin;
3109   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3110   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3111   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3112   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3113   PetscFunctionReturn(0);
3114 }
3115 
3116 /* -------------------------------------------------------------------*/
3117 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3118        MatGetRow_MPIAIJ,
3119        MatRestoreRow_MPIAIJ,
3120        MatMult_MPIAIJ,
3121 /* 4*/ MatMultAdd_MPIAIJ,
3122        MatMultTranspose_MPIAIJ,
3123        MatMultTransposeAdd_MPIAIJ,
3124 #ifdef PETSC_HAVE_PBGL
3125        MatSolve_MPIAIJ,
3126 #else
3127        0,
3128 #endif
3129        0,
3130        0,
3131 /*10*/ 0,
3132        0,
3133        0,
3134        MatSOR_MPIAIJ,
3135        MatTranspose_MPIAIJ,
3136 /*15*/ MatGetInfo_MPIAIJ,
3137        MatEqual_MPIAIJ,
3138        MatGetDiagonal_MPIAIJ,
3139        MatDiagonalScale_MPIAIJ,
3140        MatNorm_MPIAIJ,
3141 /*20*/ MatAssemblyBegin_MPIAIJ,
3142        MatAssemblyEnd_MPIAIJ,
3143        MatSetOption_MPIAIJ,
3144        MatZeroEntries_MPIAIJ,
3145 /*24*/ MatZeroRows_MPIAIJ,
3146        0,
3147 #ifdef PETSC_HAVE_PBGL
3148        0,
3149 #else
3150        0,
3151 #endif
3152        0,
3153        0,
3154 /*29*/ MatSetUp_MPIAIJ,
3155 #ifdef PETSC_HAVE_PBGL
3156        0,
3157 #else
3158        0,
3159 #endif
3160        0,
3161        0,
3162        0,
3163 /*34*/ MatDuplicate_MPIAIJ,
3164        0,
3165        0,
3166        0,
3167        0,
3168 /*39*/ MatAXPY_MPIAIJ,
3169        MatGetSubMatrices_MPIAIJ,
3170        MatIncreaseOverlap_MPIAIJ,
3171        MatGetValues_MPIAIJ,
3172        MatCopy_MPIAIJ,
3173 /*44*/ MatGetRowMax_MPIAIJ,
3174        MatScale_MPIAIJ,
3175        0,
3176        0,
3177        MatZeroRowsColumns_MPIAIJ,
3178 /*49*/ MatSetRandom_MPIAIJ,
3179        0,
3180        0,
3181        0,
3182        0,
3183 /*54*/ MatFDColoringCreate_MPIAIJ,
3184        0,
3185        MatSetUnfactored_MPIAIJ,
3186        MatPermute_MPIAIJ,
3187        0,
3188 /*59*/ MatGetSubMatrix_MPIAIJ,
3189        MatDestroy_MPIAIJ,
3190        MatView_MPIAIJ,
3191        0,
3192        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3193 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3194        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3195        0,
3196        0,
3197        0,
3198 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3199        MatGetRowMinAbs_MPIAIJ,
3200        0,
3201        MatSetColoring_MPIAIJ,
3202        0,
3203        MatSetValuesAdifor_MPIAIJ,
3204 /*75*/ MatFDColoringApply_AIJ,
3205        0,
3206        0,
3207        0,
3208        MatFindZeroDiagonals_MPIAIJ,
3209 /*80*/ 0,
3210        0,
3211        0,
3212 /*83*/ MatLoad_MPIAIJ,
3213        0,
3214        0,
3215        0,
3216        0,
3217        0,
3218 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3219        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3220        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3221        MatPtAP_MPIAIJ_MPIAIJ,
3222        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3223 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3224        0,
3225        0,
3226        0,
3227        0,
3228 /*99*/ 0,
3229        0,
3230        0,
3231        MatConjugate_MPIAIJ,
3232        0,
3233 /*104*/MatSetValuesRow_MPIAIJ,
3234        MatRealPart_MPIAIJ,
3235        MatImaginaryPart_MPIAIJ,
3236        0,
3237        0,
3238 /*109*/0,
3239        MatGetRedundantMatrix_MPIAIJ,
3240        MatGetRowMin_MPIAIJ,
3241        0,
3242        0,
3243 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3244        0,
3245        0,
3246        0,
3247        0,
3248 /*119*/0,
3249        0,
3250        0,
3251        0,
3252        MatGetMultiProcBlock_MPIAIJ,
3253 /*124*/MatFindNonzeroRows_MPIAIJ,
3254        MatGetColumnNorms_MPIAIJ,
3255        MatInvertBlockDiagonal_MPIAIJ,
3256        0,
3257        MatGetSubMatricesParallel_MPIAIJ,
3258 /*129*/0,
3259        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3260        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3261        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3262        0,
3263 /*134*/0,
3264        0,
3265        0,
3266        0,
3267        0
3268 };
3269 
3270 /* ----------------------------------------------------------------------------------------*/
3271 
3272 EXTERN_C_BEGIN
3273 #undef __FUNCT__
3274 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3275 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3276 {
3277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3278   PetscErrorCode ierr;
3279 
3280   PetscFunctionBegin;
3281   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3282   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3283   PetscFunctionReturn(0);
3284 }
3285 EXTERN_C_END
3286 
3287 EXTERN_C_BEGIN
3288 #undef __FUNCT__
3289 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3290 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3291 {
3292   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3293   PetscErrorCode ierr;
3294 
3295   PetscFunctionBegin;
3296   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3297   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3298   PetscFunctionReturn(0);
3299 }
3300 EXTERN_C_END
3301 
3302 EXTERN_C_BEGIN
3303 #undef __FUNCT__
3304 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3305 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3306 {
3307   Mat_MPIAIJ     *b;
3308   PetscErrorCode ierr;
3309   PetscInt       i;
3310   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3311 
3312   PetscFunctionBegin;
3313   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3314   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3315   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3316   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3317   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3318   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3319 
3320   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3321   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3322   if (d_nnz) {
3323     for (i=0; i<B->rmap->n; i++) {
3324       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3325     }
3326   }
3327   if (o_nnz) {
3328     for (i=0; i<B->rmap->n; i++) {
3329       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3330     }
3331   }
3332   b = (Mat_MPIAIJ*)B->data;
3333 
3334   if (!B->preallocated) {
3335     /* Explicitly create 2 MATSEQAIJ matrices. */
3336     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3337     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3338     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3339     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3340     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3341     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3342     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3343     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3344     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3345     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3346   }
3347 
3348   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3349   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3350   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3351   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3352   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3353   B->preallocated = PETSC_TRUE;
3354   PetscFunctionReturn(0);
3355 }
3356 EXTERN_C_END
3357 
3358 #undef __FUNCT__
3359 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3360 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3361 {
3362   Mat            mat;
3363   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3364   PetscErrorCode ierr;
3365 
3366   PetscFunctionBegin;
3367   *newmat       = 0;
3368   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3369   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3370   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3371   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3372   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3373   a    = (Mat_MPIAIJ*)mat->data;
3374 
3375   mat->factortype    = matin->factortype;
3376   mat->rmap->bs      = matin->rmap->bs;
3377   mat->cmap->bs      = matin->cmap->bs;
3378   mat->assembled    = PETSC_TRUE;
3379   mat->insertmode   = NOT_SET_VALUES;
3380   mat->preallocated = PETSC_TRUE;
3381 
3382   a->size           = oldmat->size;
3383   a->rank           = oldmat->rank;
3384   a->donotstash     = oldmat->donotstash;
3385   a->roworiented    = oldmat->roworiented;
3386   a->rowindices     = 0;
3387   a->rowvalues      = 0;
3388   a->getrowactive   = PETSC_FALSE;
3389 
3390   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3391   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3392 
3393   if (oldmat->colmap) {
3394 #if defined (PETSC_USE_CTABLE)
3395     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3396 #else
3397     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3398     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3399     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3400 #endif
3401   } else a->colmap = 0;
3402   if (oldmat->garray) {
3403     PetscInt len;
3404     len  = oldmat->B->cmap->n;
3405     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3406     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3407     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3408   } else a->garray = 0;
3409 
3410   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3411   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3412   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3413   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3414   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3415   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3416   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3417   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3418   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3419   *newmat = mat;
3420   PetscFunctionReturn(0);
3421 }
3422 
3423 
3424 
3425 #undef __FUNCT__
3426 #define __FUNCT__ "MatLoad_MPIAIJ"
3427 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3428 {
3429   PetscScalar    *vals,*svals;
3430   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3431   PetscErrorCode ierr;
3432   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3433   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3434   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3435   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3436   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3437   int            fd;
3438   PetscInt       bs = 1;
3439 
3440   PetscFunctionBegin;
3441   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3442   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3443   if (!rank) {
3444     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3445     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3446     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3447   }
3448 
3449   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3450   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3451   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3452 
3453   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3454 
3455   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3456   M = header[1]; N = header[2];
3457   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3458   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3459   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3460 
3461   /* If global sizes are set, check if they are consistent with that given in the file */
3462   if (sizesset) {
3463     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3464   }
3465   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3466   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3467 
3468   /* determine ownership of all (block) rows */
3469   if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3470   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3471   else m = newMat->rmap->n; /* Set by user */
3472 
3473   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3474   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3475 
3476   /* First process needs enough room for process with most rows */
3477   if (!rank) {
3478     mmax = rowners[1];
3479     for (i=2; i<=size; i++) {
3480       mmax = PetscMax(mmax, rowners[i]);
3481     }
3482   } else mmax = m;
3483 
3484   rowners[0] = 0;
3485   for (i=2; i<=size; i++) {
3486     rowners[i] += rowners[i-1];
3487   }
3488   rstart = rowners[rank];
3489   rend   = rowners[rank+1];
3490 
3491   /* distribute row lengths to all processors */
3492   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3493   if (!rank) {
3494     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3495     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3496     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3497     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3498     for (j=0; j<m; j++) {
3499       procsnz[0] += ourlens[j];
3500     }
3501     for (i=1; i<size; i++) {
3502       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3503       /* calculate the number of nonzeros on each processor */
3504       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3505         procsnz[i] += rowlengths[j];
3506       }
3507       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3508     }
3509     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3510   } else {
3511     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3512   }
3513 
3514   if (!rank) {
3515     /* determine max buffer needed and allocate it */
3516     maxnz = 0;
3517     for (i=0; i<size; i++) {
3518       maxnz = PetscMax(maxnz,procsnz[i]);
3519     }
3520     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3521 
3522     /* read in my part of the matrix column indices  */
3523     nz   = procsnz[0];
3524     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3525     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3526 
3527     /* read in every one elses and ship off */
3528     for (i=1; i<size; i++) {
3529       nz     = procsnz[i];
3530       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3531       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3532     }
3533     ierr = PetscFree(cols);CHKERRQ(ierr);
3534   } else {
3535     /* determine buffer space needed for message */
3536     nz = 0;
3537     for (i=0; i<m; i++) {
3538       nz += ourlens[i];
3539     }
3540     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3541 
3542     /* receive message of column indices*/
3543     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3544   }
3545 
3546   /* determine column ownership if matrix is not square */
3547   if (N != M) {
3548     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3549     else n = newMat->cmap->n;
3550     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3551     cstart = cend - n;
3552   } else {
3553     cstart = rstart;
3554     cend   = rend;
3555     n      = cend - cstart;
3556   }
3557 
3558   /* loop over local rows, determining number of off diagonal entries */
3559   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3560   jj = 0;
3561   for (i=0; i<m; i++) {
3562     for (j=0; j<ourlens[i]; j++) {
3563       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3564       jj++;
3565     }
3566   }
3567 
3568   for (i=0; i<m; i++) {
3569     ourlens[i] -= offlens[i];
3570   }
3571   if (!sizesset) {
3572     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3573   }
3574 
3575   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3576 
3577   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3578 
3579   for (i=0; i<m; i++) {
3580     ourlens[i] += offlens[i];
3581   }
3582 
3583   if (!rank) {
3584     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3585 
3586     /* read in my part of the matrix numerical values  */
3587     nz   = procsnz[0];
3588     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3589 
3590     /* insert into matrix */
3591     jj      = rstart;
3592     smycols = mycols;
3593     svals   = vals;
3594     for (i=0; i<m; i++) {
3595       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3596       smycols += ourlens[i];
3597       svals   += ourlens[i];
3598       jj++;
3599     }
3600 
3601     /* read in other processors and ship out */
3602     for (i=1; i<size; i++) {
3603       nz     = procsnz[i];
3604       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3605       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3606     }
3607     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3608   } else {
3609     /* receive numeric values */
3610     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3611 
3612     /* receive message of values*/
3613     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3614 
3615     /* insert into matrix */
3616     jj      = rstart;
3617     smycols = mycols;
3618     svals   = vals;
3619     for (i=0; i<m; i++) {
3620       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3621       smycols += ourlens[i];
3622       svals   += ourlens[i];
3623       jj++;
3624     }
3625   }
3626   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3627   ierr = PetscFree(vals);CHKERRQ(ierr);
3628   ierr = PetscFree(mycols);CHKERRQ(ierr);
3629   ierr = PetscFree(rowners);CHKERRQ(ierr);
3630   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632 
3633   PetscFunctionReturn(0);
3634 }
3635 
3636 #undef __FUNCT__
3637 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3638 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3639 {
3640   PetscErrorCode ierr;
3641   IS             iscol_local;
3642   PetscInt       csize;
3643 
3644   PetscFunctionBegin;
3645   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3646   if (call == MAT_REUSE_MATRIX) {
3647     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3648     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3649   } else {
3650     PetscInt cbs;
3651     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3652     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3653     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3654   }
3655   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3656   if (call == MAT_INITIAL_MATRIX) {
3657     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3658     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3659   }
3660   PetscFunctionReturn(0);
3661 }
3662 
3663 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3664 #undef __FUNCT__
3665 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3666 /*
3667     Not great since it makes two copies of the submatrix, first an SeqAIJ
3668   in local and then by concatenating the local matrices the end result.
3669   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3670 
3671   Note: This requires a sequential iscol with all indices.
3672 */
3673 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3674 {
3675   PetscErrorCode ierr;
3676   PetscMPIInt    rank,size;
3677   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3678   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3679   PetscBool      allcolumns, colflag;
3680   Mat            M,Mreuse;
3681   MatScalar      *vwork,*aa;
3682   MPI_Comm       comm = ((PetscObject)mat)->comm;
3683   Mat_SeqAIJ     *aij;
3684 
3685 
3686   PetscFunctionBegin;
3687   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3688   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3689 
3690   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3691   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3692   if (colflag && ncol == mat->cmap->N){
3693     allcolumns = PETSC_TRUE;
3694   } else {
3695     allcolumns = PETSC_FALSE;
3696   }
3697   if (call ==  MAT_REUSE_MATRIX) {
3698     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3699     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3700     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3701   } else {
3702     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3703   }
3704 
3705   /*
3706       m - number of local rows
3707       n - number of columns (same on all processors)
3708       rstart - first row in new global matrix generated
3709   */
3710   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3711   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3712   if (call == MAT_INITIAL_MATRIX) {
3713     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3714     ii  = aij->i;
3715     jj  = aij->j;
3716 
3717     /*
3718         Determine the number of non-zeros in the diagonal and off-diagonal
3719         portions of the matrix in order to do correct preallocation
3720     */
3721 
3722     /* first get start and end of "diagonal" columns */
3723     if (csize == PETSC_DECIDE) {
3724       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3725       if (mglobal == n) { /* square matrix */
3726 	nlocal = m;
3727       } else {
3728         nlocal = n/size + ((n % size) > rank);
3729       }
3730     } else {
3731       nlocal = csize;
3732     }
3733     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3734     rstart = rend - nlocal;
3735     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3736 
3737     /* next, compute all the lengths */
3738     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3739     olens = dlens + m;
3740     for (i=0; i<m; i++) {
3741       jend = ii[i+1] - ii[i];
3742       olen = 0;
3743       dlen = 0;
3744       for (j=0; j<jend; j++) {
3745         if (*jj < rstart || *jj >= rend) olen++;
3746         else dlen++;
3747         jj++;
3748       }
3749       olens[i] = olen;
3750       dlens[i] = dlen;
3751     }
3752     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3753     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3754     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3755     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3756     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3757     ierr = PetscFree(dlens);CHKERRQ(ierr);
3758   } else {
3759     PetscInt ml,nl;
3760 
3761     M = *newmat;
3762     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3763     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3764     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3765     /*
3766          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3767        rather than the slower MatSetValues().
3768     */
3769     M->was_assembled = PETSC_TRUE;
3770     M->assembled     = PETSC_FALSE;
3771   }
3772   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3773   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3774   ii  = aij->i;
3775   jj  = aij->j;
3776   aa  = aij->a;
3777   for (i=0; i<m; i++) {
3778     row   = rstart + i;
3779     nz    = ii[i+1] - ii[i];
3780     cwork = jj;     jj += nz;
3781     vwork = aa;     aa += nz;
3782     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3783   }
3784 
3785   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3786   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3787   *newmat = M;
3788 
3789   /* save submatrix used in processor for next request */
3790   if (call ==  MAT_INITIAL_MATRIX) {
3791     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3792     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3793   }
3794 
3795   PetscFunctionReturn(0);
3796 }
3797 
3798 EXTERN_C_BEGIN
3799 #undef __FUNCT__
3800 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3801 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3802 {
3803   PetscInt       m,cstart, cend,j,nnz,i,d;
3804   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3805   const PetscInt *JJ;
3806   PetscScalar    *values;
3807   PetscErrorCode ierr;
3808 
3809   PetscFunctionBegin;
3810   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3811 
3812   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3813   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3814   m      = B->rmap->n;
3815   cstart = B->cmap->rstart;
3816   cend   = B->cmap->rend;
3817   rstart = B->rmap->rstart;
3818 
3819   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3820 
3821 #if defined(PETSC_USE_DEBUGGING)
3822   for (i=0; i<m; i++) {
3823     nnz     = Ii[i+1]- Ii[i];
3824     JJ      = J + Ii[i];
3825     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3826     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3827     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3828   }
3829 #endif
3830 
3831   for (i=0; i<m; i++) {
3832     nnz     = Ii[i+1]- Ii[i];
3833     JJ      = J + Ii[i];
3834     nnz_max = PetscMax(nnz_max,nnz);
3835     d       = 0;
3836     for (j=0; j<nnz; j++) {
3837       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3838     }
3839     d_nnz[i] = d;
3840     o_nnz[i] = nnz - d;
3841   }
3842   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3843   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3844 
3845   if (v) values = (PetscScalar*)v;
3846   else {
3847     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3848     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3849   }
3850 
3851   for (i=0; i<m; i++) {
3852     ii   = i + rstart;
3853     nnz  = Ii[i+1]- Ii[i];
3854     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3855   }
3856   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3857   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3858 
3859   if (!v) {
3860     ierr = PetscFree(values);CHKERRQ(ierr);
3861   }
3862   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3863   PetscFunctionReturn(0);
3864 }
3865 EXTERN_C_END
3866 
3867 #undef __FUNCT__
3868 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3869 /*@
3870    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3871    (the default parallel PETSc format).
3872 
3873    Collective on MPI_Comm
3874 
3875    Input Parameters:
3876 +  B - the matrix
3877 .  i - the indices into j for the start of each local row (starts with zero)
3878 .  j - the column indices for each local row (starts with zero)
3879 -  v - optional values in the matrix
3880 
3881    Level: developer
3882 
3883    Notes:
3884        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3885      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3886      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3887 
3888        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3889 
3890        The format which is used for the sparse matrix input, is equivalent to a
3891     row-major ordering.. i.e for the following matrix, the input data expected is
3892     as shown:
3893 
3894         1 0 0
3895         2 0 3     P0
3896        -------
3897         4 5 6     P1
3898 
3899      Process0 [P0]: rows_owned=[0,1]
3900         i =  {0,1,3}  [size = nrow+1  = 2+1]
3901         j =  {0,0,2}  [size = nz = 6]
3902         v =  {1,2,3}  [size = nz = 6]
3903 
3904      Process1 [P1]: rows_owned=[2]
3905         i =  {0,3}    [size = nrow+1  = 1+1]
3906         j =  {0,1,2}  [size = nz = 6]
3907         v =  {4,5,6}  [size = nz = 6]
3908 
3909 .keywords: matrix, aij, compressed row, sparse, parallel
3910 
3911 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3912           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3913 @*/
3914 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3915 {
3916   PetscErrorCode ierr;
3917 
3918   PetscFunctionBegin;
3919   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3920   PetscFunctionReturn(0);
3921 }
3922 
3923 #undef __FUNCT__
3924 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3925 /*@C
3926    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3927    (the default parallel PETSc format).  For good matrix assembly performance
3928    the user should preallocate the matrix storage by setting the parameters
3929    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3930    performance can be increased by more than a factor of 50.
3931 
3932    Collective on MPI_Comm
3933 
3934    Input Parameters:
3935 +  A - the matrix
3936 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3937            (same value is used for all local rows)
3938 .  d_nnz - array containing the number of nonzeros in the various rows of the
3939            DIAGONAL portion of the local submatrix (possibly different for each row)
3940            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3941            The size of this array is equal to the number of local rows, i.e 'm'.
3942            For matrices that will be factored, you must leave room for (and set)
3943            the diagonal entry even if it is zero.
3944 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3945            submatrix (same value is used for all local rows).
3946 -  o_nnz - array containing the number of nonzeros in the various rows of the
3947            OFF-DIAGONAL portion of the local submatrix (possibly different for
3948            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3949            structure. The size of this array is equal to the number
3950            of local rows, i.e 'm'.
3951 
3952    If the *_nnz parameter is given then the *_nz parameter is ignored
3953 
3954    The AIJ format (also called the Yale sparse matrix format or
3955    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3956    storage.  The stored row and column indices begin with zero.
3957    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3958 
3959    The parallel matrix is partitioned such that the first m0 rows belong to
3960    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3961    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3962 
3963    The DIAGONAL portion of the local submatrix of a processor can be defined
3964    as the submatrix which is obtained by extraction the part corresponding to
3965    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3966    first row that belongs to the processor, r2 is the last row belonging to
3967    the this processor, and c1-c2 is range of indices of the local part of a
3968    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3969    common case of a square matrix, the row and column ranges are the same and
3970    the DIAGONAL part is also square. The remaining portion of the local
3971    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3972 
3973    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3974 
3975    You can call MatGetInfo() to get information on how effective the preallocation was;
3976    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3977    You can also run with the option -info and look for messages with the string
3978    malloc in them to see if additional memory allocation was needed.
3979 
3980    Example usage:
3981 
3982    Consider the following 8x8 matrix with 34 non-zero values, that is
3983    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3984    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3985    as follows:
3986 
3987 .vb
3988             1  2  0  |  0  3  0  |  0  4
3989     Proc0   0  5  6  |  7  0  0  |  8  0
3990             9  0 10  | 11  0  0  | 12  0
3991     -------------------------------------
3992            13  0 14  | 15 16 17  |  0  0
3993     Proc1   0 18  0  | 19 20 21  |  0  0
3994             0  0  0  | 22 23  0  | 24  0
3995     -------------------------------------
3996     Proc2  25 26 27  |  0  0 28  | 29  0
3997            30  0  0  | 31 32 33  |  0 34
3998 .ve
3999 
4000    This can be represented as a collection of submatrices as:
4001 
4002 .vb
4003       A B C
4004       D E F
4005       G H I
4006 .ve
4007 
4008    Where the submatrices A,B,C are owned by proc0, D,E,F are
4009    owned by proc1, G,H,I are owned by proc2.
4010 
4011    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4012    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4013    The 'M','N' parameters are 8,8, and have the same values on all procs.
4014 
4015    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4016    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4017    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4018    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4019    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4020    matrix, ans [DF] as another SeqAIJ matrix.
4021 
4022    When d_nz, o_nz parameters are specified, d_nz storage elements are
4023    allocated for every row of the local diagonal submatrix, and o_nz
4024    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4025    One way to choose d_nz and o_nz is to use the max nonzerors per local
4026    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4027    In this case, the values of d_nz,o_nz are:
4028 .vb
4029      proc0 : dnz = 2, o_nz = 2
4030      proc1 : dnz = 3, o_nz = 2
4031      proc2 : dnz = 1, o_nz = 4
4032 .ve
4033    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4034    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4035    for proc3. i.e we are using 12+15+10=37 storage locations to store
4036    34 values.
4037 
4038    When d_nnz, o_nnz parameters are specified, the storage is specified
4039    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4040    In the above case the values for d_nnz,o_nnz are:
4041 .vb
4042      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4043      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4044      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4045 .ve
4046    Here the space allocated is sum of all the above values i.e 34, and
4047    hence pre-allocation is perfect.
4048 
4049    Level: intermediate
4050 
4051 .keywords: matrix, aij, compressed row, sparse, parallel
4052 
4053 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4054           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4055 @*/
4056 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4057 {
4058   PetscErrorCode ierr;
4059 
4060   PetscFunctionBegin;
4061   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4062   PetscValidType(B,1);
4063   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4064   PetscFunctionReturn(0);
4065 }
4066 
4067 #undef __FUNCT__
4068 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4069 /*@
4070      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4071          CSR format the local rows.
4072 
4073    Collective on MPI_Comm
4074 
4075    Input Parameters:
4076 +  comm - MPI communicator
4077 .  m - number of local rows (Cannot be PETSC_DECIDE)
4078 .  n - This value should be the same as the local size used in creating the
4079        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4080        calculated if N is given) For square matrices n is almost always m.
4081 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4082 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4083 .   i - row indices
4084 .   j - column indices
4085 -   a - matrix values
4086 
4087    Output Parameter:
4088 .   mat - the matrix
4089 
4090    Level: intermediate
4091 
4092    Notes:
4093        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4094      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4095      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4096 
4097        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4098 
4099        The format which is used for the sparse matrix input, is equivalent to a
4100     row-major ordering.. i.e for the following matrix, the input data expected is
4101     as shown:
4102 
4103         1 0 0
4104         2 0 3     P0
4105        -------
4106         4 5 6     P1
4107 
4108      Process0 [P0]: rows_owned=[0,1]
4109         i =  {0,1,3}  [size = nrow+1  = 2+1]
4110         j =  {0,0,2}  [size = nz = 6]
4111         v =  {1,2,3}  [size = nz = 6]
4112 
4113      Process1 [P1]: rows_owned=[2]
4114         i =  {0,3}    [size = nrow+1  = 1+1]
4115         j =  {0,1,2}  [size = nz = 6]
4116         v =  {4,5,6}  [size = nz = 6]
4117 
4118 .keywords: matrix, aij, compressed row, sparse, parallel
4119 
4120 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4121           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4122 @*/
4123 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4124 {
4125   PetscErrorCode ierr;
4126 
4127   PetscFunctionBegin;
4128   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4129   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4130   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4131   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4132   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4133   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4134   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4135   PetscFunctionReturn(0);
4136 }
4137 
4138 #undef __FUNCT__
4139 #define __FUNCT__ "MatCreateAIJ"
4140 /*@C
4141    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4142    (the default parallel PETSc format).  For good matrix assembly performance
4143    the user should preallocate the matrix storage by setting the parameters
4144    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4145    performance can be increased by more than a factor of 50.
4146 
4147    Collective on MPI_Comm
4148 
4149    Input Parameters:
4150 +  comm - MPI communicator
4151 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4152            This value should be the same as the local size used in creating the
4153            y vector for the matrix-vector product y = Ax.
4154 .  n - This value should be the same as the local size used in creating the
4155        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4156        calculated if N is given) For square matrices n is almost always m.
4157 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4158 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4159 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4160            (same value is used for all local rows)
4161 .  d_nnz - array containing the number of nonzeros in the various rows of the
4162            DIAGONAL portion of the local submatrix (possibly different for each row)
4163            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4164            The size of this array is equal to the number of local rows, i.e 'm'.
4165 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4166            submatrix (same value is used for all local rows).
4167 -  o_nnz - array containing the number of nonzeros in the various rows of the
4168            OFF-DIAGONAL portion of the local submatrix (possibly different for
4169            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4170            structure. The size of this array is equal to the number
4171            of local rows, i.e 'm'.
4172 
4173    Output Parameter:
4174 .  A - the matrix
4175 
4176    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4177    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4178    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4179 
4180    Notes:
4181    If the *_nnz parameter is given then the *_nz parameter is ignored
4182 
4183    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4184    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4185    storage requirements for this matrix.
4186 
4187    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4188    processor than it must be used on all processors that share the object for
4189    that argument.
4190 
4191    The user MUST specify either the local or global matrix dimensions
4192    (possibly both).
4193 
4194    The parallel matrix is partitioned across processors such that the
4195    first m0 rows belong to process 0, the next m1 rows belong to
4196    process 1, the next m2 rows belong to process 2 etc.. where
4197    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4198    values corresponding to [m x N] submatrix.
4199 
4200    The columns are logically partitioned with the n0 columns belonging
4201    to 0th partition, the next n1 columns belonging to the next
4202    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4203 
4204    The DIAGONAL portion of the local submatrix on any given processor
4205    is the submatrix corresponding to the rows and columns m,n
4206    corresponding to the given processor. i.e diagonal matrix on
4207    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4208    etc. The remaining portion of the local submatrix [m x (N-n)]
4209    constitute the OFF-DIAGONAL portion. The example below better
4210    illustrates this concept.
4211 
4212    For a square global matrix we define each processor's diagonal portion
4213    to be its local rows and the corresponding columns (a square submatrix);
4214    each processor's off-diagonal portion encompasses the remainder of the
4215    local matrix (a rectangular submatrix).
4216 
4217    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4218 
4219    When calling this routine with a single process communicator, a matrix of
4220    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4221    type of communicator, use the construction mechanism:
4222      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4223 
4224    By default, this format uses inodes (identical nodes) when possible.
4225    We search for consecutive rows with the same nonzero structure, thereby
4226    reusing matrix information to achieve increased efficiency.
4227 
4228    Options Database Keys:
4229 +  -mat_no_inode  - Do not use inodes
4230 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4231 -  -mat_aij_oneindex - Internally use indexing starting at 1
4232         rather than 0.  Note that when calling MatSetValues(),
4233         the user still MUST index entries starting at 0!
4234 
4235 
4236    Example usage:
4237 
4238    Consider the following 8x8 matrix with 34 non-zero values, that is
4239    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4240    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4241    as follows:
4242 
4243 .vb
4244             1  2  0  |  0  3  0  |  0  4
4245     Proc0   0  5  6  |  7  0  0  |  8  0
4246             9  0 10  | 11  0  0  | 12  0
4247     -------------------------------------
4248            13  0 14  | 15 16 17  |  0  0
4249     Proc1   0 18  0  | 19 20 21  |  0  0
4250             0  0  0  | 22 23  0  | 24  0
4251     -------------------------------------
4252     Proc2  25 26 27  |  0  0 28  | 29  0
4253            30  0  0  | 31 32 33  |  0 34
4254 .ve
4255 
4256    This can be represented as a collection of submatrices as:
4257 
4258 .vb
4259       A B C
4260       D E F
4261       G H I
4262 .ve
4263 
4264    Where the submatrices A,B,C are owned by proc0, D,E,F are
4265    owned by proc1, G,H,I are owned by proc2.
4266 
4267    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4268    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4269    The 'M','N' parameters are 8,8, and have the same values on all procs.
4270 
4271    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4272    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4273    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4274    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4275    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4276    matrix, ans [DF] as another SeqAIJ matrix.
4277 
4278    When d_nz, o_nz parameters are specified, d_nz storage elements are
4279    allocated for every row of the local diagonal submatrix, and o_nz
4280    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4281    One way to choose d_nz and o_nz is to use the max nonzerors per local
4282    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4283    In this case, the values of d_nz,o_nz are:
4284 .vb
4285      proc0 : dnz = 2, o_nz = 2
4286      proc1 : dnz = 3, o_nz = 2
4287      proc2 : dnz = 1, o_nz = 4
4288 .ve
4289    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4290    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4291    for proc3. i.e we are using 12+15+10=37 storage locations to store
4292    34 values.
4293 
4294    When d_nnz, o_nnz parameters are specified, the storage is specified
4295    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4296    In the above case the values for d_nnz,o_nnz are:
4297 .vb
4298      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4299      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4300      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4301 .ve
4302    Here the space allocated is sum of all the above values i.e 34, and
4303    hence pre-allocation is perfect.
4304 
4305    Level: intermediate
4306 
4307 .keywords: matrix, aij, compressed row, sparse, parallel
4308 
4309 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4310           MPIAIJ, MatCreateMPIAIJWithArrays()
4311 @*/
4312 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4313 {
4314   PetscErrorCode ierr;
4315   PetscMPIInt    size;
4316 
4317   PetscFunctionBegin;
4318   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4319   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4320   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4321   if (size > 1) {
4322     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4323     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4324   } else {
4325     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4326     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4327   }
4328   PetscFunctionReturn(0);
4329 }
4330 
4331 #undef __FUNCT__
4332 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4333 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4334 {
4335   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4336 
4337   PetscFunctionBegin;
4338   *Ad     = a->A;
4339   *Ao     = a->B;
4340   *colmap = a->garray;
4341   PetscFunctionReturn(0);
4342 }
4343 
4344 #undef __FUNCT__
4345 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4346 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4347 {
4348   PetscErrorCode ierr;
4349   PetscInt       i;
4350   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4351 
4352   PetscFunctionBegin;
4353   if (coloring->ctype == IS_COLORING_GLOBAL) {
4354     ISColoringValue *allcolors,*colors;
4355     ISColoring      ocoloring;
4356 
4357     /* set coloring for diagonal portion */
4358     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4359 
4360     /* set coloring for off-diagonal portion */
4361     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4362     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4363     for (i=0; i<a->B->cmap->n; i++) {
4364       colors[i] = allcolors[a->garray[i]];
4365     }
4366     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4367     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4368     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4369     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4370   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4371     ISColoringValue *colors;
4372     PetscInt        *larray;
4373     ISColoring      ocoloring;
4374 
4375     /* set coloring for diagonal portion */
4376     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4377     for (i=0; i<a->A->cmap->n; i++) {
4378       larray[i] = i + A->cmap->rstart;
4379     }
4380     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4381     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4382     for (i=0; i<a->A->cmap->n; i++) {
4383       colors[i] = coloring->colors[larray[i]];
4384     }
4385     ierr = PetscFree(larray);CHKERRQ(ierr);
4386     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4387     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4388     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4389 
4390     /* set coloring for off-diagonal portion */
4391     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4392     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4393     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4394     for (i=0; i<a->B->cmap->n; i++) {
4395       colors[i] = coloring->colors[larray[i]];
4396     }
4397     ierr = PetscFree(larray);CHKERRQ(ierr);
4398     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4399     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4400     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4401   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4402 
4403   PetscFunctionReturn(0);
4404 }
4405 
4406 #undef __FUNCT__
4407 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4408 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4409 {
4410   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4411   PetscErrorCode ierr;
4412 
4413   PetscFunctionBegin;
4414   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4415   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4416   PetscFunctionReturn(0);
4417 }
4418 
4419 #undef __FUNCT__
4420 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4421 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4422 {
4423   PetscErrorCode ierr;
4424   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4425   PetscInt       *indx;
4426 
4427   PetscFunctionBegin;
4428   /* This routine will ONLY return MPIAIJ type matrix */
4429   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4430   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4431   if (n == PETSC_DECIDE){
4432     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4433   }
4434   /* Check sum(n) = N */
4435   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4436   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4437 
4438   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4439   rstart -= m;
4440 
4441   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4442   for (i=0;i<m;i++) {
4443     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4444     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4445     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4446   }
4447 
4448   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4449   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4450   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4451   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4452   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4453   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4454   PetscFunctionReturn(0);
4455 }
4456 
4457 #undef __FUNCT__
4458 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4459 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4460 {
4461   PetscErrorCode ierr;
4462   PetscInt       m,N,i,rstart,nnz,Ii;
4463   PetscInt       *indx;
4464   PetscScalar    *values;
4465 
4466   PetscFunctionBegin;
4467   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4468   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4469   for (i=0;i<m;i++) {
4470     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4471     Ii    = i + rstart;
4472     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4473     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4474   }
4475   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4476   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4477   PetscFunctionReturn(0);
4478 }
4479 
4480 #undef __FUNCT__
4481 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4482 /*@
4483       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4484                  matrices from each processor
4485 
4486     Collective on MPI_Comm
4487 
4488    Input Parameters:
4489 +    comm - the communicators the parallel matrix will live on
4490 .    inmat - the input sequential matrices
4491 .    n - number of local columns (or PETSC_DECIDE)
4492 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4493 
4494    Output Parameter:
4495 .    outmat - the parallel matrix generated
4496 
4497     Level: advanced
4498 
4499    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4500 
4501 @*/
4502 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4503 {
4504   PetscErrorCode ierr;
4505 
4506   PetscFunctionBegin;
4507   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4508   if (scall == MAT_INITIAL_MATRIX){
4509     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4510   }
4511   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4512   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4513   PetscFunctionReturn(0);
4514 }
4515 
4516 #undef __FUNCT__
4517 #define __FUNCT__ "MatFileSplit"
4518 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4519 {
4520   PetscErrorCode    ierr;
4521   PetscMPIInt       rank;
4522   PetscInt          m,N,i,rstart,nnz;
4523   size_t            len;
4524   const PetscInt    *indx;
4525   PetscViewer       out;
4526   char              *name;
4527   Mat               B;
4528   const PetscScalar *values;
4529 
4530   PetscFunctionBegin;
4531   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4532   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4533   /* Should this be the type of the diagonal block of A? */
4534   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4535   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4536   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4537   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4538   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4539   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4540   for (i=0;i<m;i++) {
4541     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4542     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4543     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4544   }
4545   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4546   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4547 
4548   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4549   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4550   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4551   sprintf(name,"%s.%d",outfile,rank);
4552   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4553   ierr = PetscFree(name);CHKERRQ(ierr);
4554   ierr = MatView(B,out);CHKERRQ(ierr);
4555   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4556   ierr = MatDestroy(&B);CHKERRQ(ierr);
4557   PetscFunctionReturn(0);
4558 }
4559 
4560 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4561 #undef __FUNCT__
4562 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4563 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4564 {
4565   PetscErrorCode       ierr;
4566   Mat_Merge_SeqsToMPI  *merge;
4567   PetscContainer       container;
4568 
4569   PetscFunctionBegin;
4570   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4571   if (container) {
4572     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4573     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4574     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4575     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4576     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4577     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4578     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4579     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4580     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4581     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4582     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4583     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4584     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4585     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4586     ierr = PetscFree(merge);CHKERRQ(ierr);
4587     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4588   }
4589   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4590   PetscFunctionReturn(0);
4591 }
4592 
4593 #include <../src/mat/utils/freespace.h>
4594 #include <petscbt.h>
4595 
4596 #undef __FUNCT__
4597 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4598 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4599 {
4600   PetscErrorCode       ierr;
4601   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4602   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4603   PetscMPIInt          size,rank,taga,*len_s;
4604   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4605   PetscInt             proc,m;
4606   PetscInt             **buf_ri,**buf_rj;
4607   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4608   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4609   MPI_Request          *s_waits,*r_waits;
4610   MPI_Status           *status;
4611   MatScalar            *aa=a->a;
4612   MatScalar            **abuf_r,*ba_i;
4613   Mat_Merge_SeqsToMPI  *merge;
4614   PetscContainer       container;
4615 
4616   PetscFunctionBegin;
4617   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4618 
4619   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4620   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4621 
4622   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4623   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4624 
4625   bi     = merge->bi;
4626   bj     = merge->bj;
4627   buf_ri = merge->buf_ri;
4628   buf_rj = merge->buf_rj;
4629 
4630   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4631   owners = merge->rowmap->range;
4632   len_s  = merge->len_s;
4633 
4634   /* send and recv matrix values */
4635   /*-----------------------------*/
4636   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4637   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4638 
4639   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4640   for (proc=0,k=0; proc<size; proc++){
4641     if (!len_s[proc]) continue;
4642     i = owners[proc];
4643     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4644     k++;
4645   }
4646 
4647   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4648   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4649   ierr = PetscFree(status);CHKERRQ(ierr);
4650 
4651   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4652   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4653 
4654   /* insert mat values of mpimat */
4655   /*----------------------------*/
4656   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4657   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4658 
4659   for (k=0; k<merge->nrecv; k++){
4660     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4661     nrows = *(buf_ri_k[k]);
4662     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4663     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4664   }
4665 
4666   /* set values of ba */
4667   m = merge->rowmap->n;
4668   for (i=0; i<m; i++) {
4669     arow = owners[rank] + i;
4670     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4671     bnzi = bi[i+1] - bi[i];
4672     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4673 
4674     /* add local non-zero vals of this proc's seqmat into ba */
4675     anzi = ai[arow+1] - ai[arow];
4676     aj   = a->j + ai[arow];
4677     aa   = a->a + ai[arow];
4678     nextaj = 0;
4679     for (j=0; nextaj<anzi; j++){
4680       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4681         ba_i[j] += aa[nextaj++];
4682       }
4683     }
4684 
4685     /* add received vals into ba */
4686     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4687       /* i-th row */
4688       if (i == *nextrow[k]) {
4689         anzi = *(nextai[k]+1) - *nextai[k];
4690         aj   = buf_rj[k] + *(nextai[k]);
4691         aa   = abuf_r[k] + *(nextai[k]);
4692         nextaj = 0;
4693         for (j=0; nextaj<anzi; j++){
4694           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4695             ba_i[j] += aa[nextaj++];
4696           }
4697         }
4698         nextrow[k]++; nextai[k]++;
4699       }
4700     }
4701     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4702   }
4703   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4704   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4705 
4706   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4707   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4708   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4709   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4710   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4711   PetscFunctionReturn(0);
4712 }
4713 
4714 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4715 
4716 #undef __FUNCT__
4717 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4718 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4719 {
4720   PetscErrorCode       ierr;
4721   Mat                  B_mpi;
4722   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4723   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4724   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4725   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4726   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4727   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4728   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4729   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4730   MPI_Status           *status;
4731   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4732   PetscBT              lnkbt;
4733   Mat_Merge_SeqsToMPI  *merge;
4734   PetscContainer       container;
4735 
4736   PetscFunctionBegin;
4737   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4738 
4739   /* make sure it is a PETSc comm */
4740   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4741   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4742   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4743 
4744   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4745   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4746 
4747   /* determine row ownership */
4748   /*---------------------------------------------------------*/
4749   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4750   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4751   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4752   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4753   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4754   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4755   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4756 
4757   m      = merge->rowmap->n;
4758   owners = merge->rowmap->range;
4759 
4760   /* determine the number of messages to send, their lengths */
4761   /*---------------------------------------------------------*/
4762   len_s  = merge->len_s;
4763 
4764   len = 0;  /* length of buf_si[] */
4765   merge->nsend = 0;
4766   for (proc=0; proc<size; proc++){
4767     len_si[proc] = 0;
4768     if (proc == rank){
4769       len_s[proc] = 0;
4770     } else {
4771       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4772       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4773     }
4774     if (len_s[proc]) {
4775       merge->nsend++;
4776       nrows = 0;
4777       for (i=owners[proc]; i<owners[proc+1]; i++){
4778         if (ai[i+1] > ai[i]) nrows++;
4779       }
4780       len_si[proc] = 2*(nrows+1);
4781       len += len_si[proc];
4782     }
4783   }
4784 
4785   /* determine the number and length of messages to receive for ij-structure */
4786   /*-------------------------------------------------------------------------*/
4787   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4788   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4789 
4790   /* post the Irecv of j-structure */
4791   /*-------------------------------*/
4792   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4793   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4794 
4795   /* post the Isend of j-structure */
4796   /*--------------------------------*/
4797   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4798 
4799   for (proc=0, k=0; proc<size; proc++){
4800     if (!len_s[proc]) continue;
4801     i = owners[proc];
4802     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4803     k++;
4804   }
4805 
4806   /* receives and sends of j-structure are complete */
4807   /*------------------------------------------------*/
4808   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4809   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4810 
4811   /* send and recv i-structure */
4812   /*---------------------------*/
4813   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4814   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4815 
4816   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4817   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4818   for (proc=0,k=0; proc<size; proc++){
4819     if (!len_s[proc]) continue;
4820     /* form outgoing message for i-structure:
4821          buf_si[0]:                 nrows to be sent
4822                [1:nrows]:           row index (global)
4823                [nrows+1:2*nrows+1]: i-structure index
4824     */
4825     /*-------------------------------------------*/
4826     nrows = len_si[proc]/2 - 1;
4827     buf_si_i    = buf_si + nrows+1;
4828     buf_si[0]   = nrows;
4829     buf_si_i[0] = 0;
4830     nrows = 0;
4831     for (i=owners[proc]; i<owners[proc+1]; i++){
4832       anzi = ai[i+1] - ai[i];
4833       if (anzi) {
4834         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4835         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4836         nrows++;
4837       }
4838     }
4839     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4840     k++;
4841     buf_si += len_si[proc];
4842   }
4843 
4844   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4845   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4846 
4847   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4848   for (i=0; i<merge->nrecv; i++){
4849     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4850   }
4851 
4852   ierr = PetscFree(len_si);CHKERRQ(ierr);
4853   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4854   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4855   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4856   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4857   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4858   ierr = PetscFree(status);CHKERRQ(ierr);
4859 
4860   /* compute a local seq matrix in each processor */
4861   /*----------------------------------------------*/
4862   /* allocate bi array and free space for accumulating nonzero column info */
4863   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4864   bi[0] = 0;
4865 
4866   /* create and initialize a linked list */
4867   nlnk = N+1;
4868   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4869 
4870   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4871   len  = ai[owners[rank+1]] - ai[owners[rank]];
4872   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4873   current_space = free_space;
4874 
4875   /* determine symbolic info for each local row */
4876   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4877 
4878   for (k=0; k<merge->nrecv; k++){
4879     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4880     nrows = *buf_ri_k[k];
4881     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4882     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4883   }
4884 
4885   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4886   len = 0;
4887   for (i=0;i<m;i++) {
4888     bnzi   = 0;
4889     /* add local non-zero cols of this proc's seqmat into lnk */
4890     arow   = owners[rank] + i;
4891     anzi   = ai[arow+1] - ai[arow];
4892     aj     = a->j + ai[arow];
4893     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4894     bnzi += nlnk;
4895     /* add received col data into lnk */
4896     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4897       if (i == *nextrow[k]) { /* i-th row */
4898         anzi = *(nextai[k]+1) - *nextai[k];
4899         aj   = buf_rj[k] + *nextai[k];
4900         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4901         bnzi += nlnk;
4902         nextrow[k]++; nextai[k]++;
4903       }
4904     }
4905     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4906 
4907     /* if free space is not available, make more free space */
4908     if (current_space->local_remaining<bnzi) {
4909       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4910       nspacedouble++;
4911     }
4912     /* copy data into free space, then initialize lnk */
4913     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4914     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4915 
4916     current_space->array           += bnzi;
4917     current_space->local_used      += bnzi;
4918     current_space->local_remaining -= bnzi;
4919 
4920     bi[i+1] = bi[i] + bnzi;
4921   }
4922 
4923   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4924 
4925   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4926   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4927   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4928 
4929   /* create symbolic parallel matrix B_mpi */
4930   /*---------------------------------------*/
4931     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4932   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4933   if (n==PETSC_DECIDE) {
4934     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4935   } else {
4936     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4937   }
4938   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4939   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4940   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4941   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4942   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4943 
4944   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4945   B_mpi->assembled     = PETSC_FALSE;
4946   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4947   merge->bi            = bi;
4948   merge->bj            = bj;
4949   merge->buf_ri        = buf_ri;
4950   merge->buf_rj        = buf_rj;
4951   merge->coi           = PETSC_NULL;
4952   merge->coj           = PETSC_NULL;
4953   merge->owners_co     = PETSC_NULL;
4954 
4955   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4956 
4957   /* attach the supporting struct to B_mpi for reuse */
4958   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4959   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4960   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4961   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4962   *mpimat = B_mpi;
4963 
4964   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4965   PetscFunctionReturn(0);
4966 }
4967 
4968 #undef __FUNCT__
4969 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4970 /*@C
4971       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4972                  matrices from each processor
4973 
4974     Collective on MPI_Comm
4975 
4976    Input Parameters:
4977 +    comm - the communicators the parallel matrix will live on
4978 .    seqmat - the input sequential matrices
4979 .    m - number of local rows (or PETSC_DECIDE)
4980 .    n - number of local columns (or PETSC_DECIDE)
4981 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4982 
4983    Output Parameter:
4984 .    mpimat - the parallel matrix generated
4985 
4986     Level: advanced
4987 
4988    Notes:
4989      The dimensions of the sequential matrix in each processor MUST be the same.
4990      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4991      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4992 @*/
4993 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4994 {
4995   PetscErrorCode   ierr;
4996   PetscMPIInt     size;
4997 
4998   PetscFunctionBegin;
4999   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5000   if (size == 1){
5001      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5002      if (scall == MAT_INITIAL_MATRIX){
5003        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5004      } else {
5005        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5006      }
5007      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5008      PetscFunctionReturn(0);
5009   }
5010   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5011   if (scall == MAT_INITIAL_MATRIX){
5012     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5013   }
5014   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5015   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5016   PetscFunctionReturn(0);
5017 }
5018 
5019 #undef __FUNCT__
5020 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5021 /*@
5022      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5023           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5024           with MatGetSize()
5025 
5026     Not Collective
5027 
5028    Input Parameters:
5029 +    A - the matrix
5030 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5031 
5032    Output Parameter:
5033 .    A_loc - the local sequential matrix generated
5034 
5035     Level: developer
5036 
5037 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5038 
5039 @*/
5040 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5041 {
5042   PetscErrorCode  ierr;
5043   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
5044   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5045   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5046   MatScalar       *aa=a->a,*ba=b->a,*cam;
5047   PetscScalar     *ca;
5048   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5049   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
5050   PetscBool       match;
5051 
5052   PetscFunctionBegin;
5053   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5054   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5055   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5056   if (scall == MAT_INITIAL_MATRIX){
5057     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5058     ci[0] = 0;
5059     for (i=0; i<am; i++){
5060       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5061     }
5062     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5063     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5064     k = 0;
5065     for (i=0; i<am; i++) {
5066       ncols_o = bi[i+1] - bi[i];
5067       ncols_d = ai[i+1] - ai[i];
5068       /* off-diagonal portion of A */
5069       for (jo=0; jo<ncols_o; jo++) {
5070         col = cmap[*bj];
5071         if (col >= cstart) break;
5072         cj[k]   = col; bj++;
5073         ca[k++] = *ba++;
5074       }
5075       /* diagonal portion of A */
5076       for (j=0; j<ncols_d; j++) {
5077         cj[k]   = cstart + *aj++;
5078         ca[k++] = *aa++;
5079       }
5080       /* off-diagonal portion of A */
5081       for (j=jo; j<ncols_o; j++) {
5082         cj[k]   = cmap[*bj++];
5083         ca[k++] = *ba++;
5084       }
5085     }
5086     /* put together the new matrix */
5087     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5088     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5089     /* Since these are PETSc arrays, change flags to free them as necessary. */
5090     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5091     mat->free_a  = PETSC_TRUE;
5092     mat->free_ij = PETSC_TRUE;
5093     mat->nonew   = 0;
5094   } else if (scall == MAT_REUSE_MATRIX){
5095     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5096     ci = mat->i; cj = mat->j; cam = mat->a;
5097     for (i=0; i<am; i++) {
5098       /* off-diagonal portion of A */
5099       ncols_o = bi[i+1] - bi[i];
5100       for (jo=0; jo<ncols_o; jo++) {
5101         col = cmap[*bj];
5102         if (col >= cstart) break;
5103         *cam++ = *ba++; bj++;
5104       }
5105       /* diagonal portion of A */
5106       ncols_d = ai[i+1] - ai[i];
5107       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5108       /* off-diagonal portion of A */
5109       for (j=jo; j<ncols_o; j++) {
5110         *cam++ = *ba++; bj++;
5111       }
5112     }
5113   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5114   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5115   PetscFunctionReturn(0);
5116 }
5117 
5118 #undef __FUNCT__
5119 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5120 /*@C
5121      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5122 
5123     Not Collective
5124 
5125    Input Parameters:
5126 +    A - the matrix
5127 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5128 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5129 
5130    Output Parameter:
5131 .    A_loc - the local sequential matrix generated
5132 
5133     Level: developer
5134 
5135 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5136 
5137 @*/
5138 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5139 {
5140   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5141   PetscErrorCode    ierr;
5142   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5143   IS                isrowa,iscola;
5144   Mat               *aloc;
5145   PetscBool       match;
5146 
5147   PetscFunctionBegin;
5148   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5149   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5150   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5151   if (!row){
5152     start = A->rmap->rstart; end = A->rmap->rend;
5153     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5154   } else {
5155     isrowa = *row;
5156   }
5157   if (!col){
5158     start = A->cmap->rstart;
5159     cmap  = a->garray;
5160     nzA   = a->A->cmap->n;
5161     nzB   = a->B->cmap->n;
5162     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5163     ncols = 0;
5164     for (i=0; i<nzB; i++) {
5165       if (cmap[i] < start) idx[ncols++] = cmap[i];
5166       else break;
5167     }
5168     imark = i;
5169     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5170     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5171     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5172   } else {
5173     iscola = *col;
5174   }
5175   if (scall != MAT_INITIAL_MATRIX){
5176     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5177     aloc[0] = *A_loc;
5178   }
5179   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5180   *A_loc = aloc[0];
5181   ierr = PetscFree(aloc);CHKERRQ(ierr);
5182   if (!row){
5183     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5184   }
5185   if (!col){
5186     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5187   }
5188   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5189   PetscFunctionReturn(0);
5190 }
5191 
5192 #undef __FUNCT__
5193 #define __FUNCT__ "MatGetBrowsOfAcols"
5194 /*@C
5195     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5196 
5197     Collective on Mat
5198 
5199    Input Parameters:
5200 +    A,B - the matrices in mpiaij format
5201 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5202 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5203 
5204    Output Parameter:
5205 +    rowb, colb - index sets of rows and columns of B to extract
5206 -    B_seq - the sequential matrix generated
5207 
5208     Level: developer
5209 
5210 @*/
5211 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5212 {
5213   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5214   PetscErrorCode    ierr;
5215   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5216   IS                isrowb,iscolb;
5217   Mat               *bseq=PETSC_NULL;
5218 
5219   PetscFunctionBegin;
5220   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5221     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5222   }
5223   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5224 
5225   if (scall == MAT_INITIAL_MATRIX){
5226     start = A->cmap->rstart;
5227     cmap  = a->garray;
5228     nzA   = a->A->cmap->n;
5229     nzB   = a->B->cmap->n;
5230     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5231     ncols = 0;
5232     for (i=0; i<nzB; i++) {  /* row < local row index */
5233       if (cmap[i] < start) idx[ncols++] = cmap[i];
5234       else break;
5235     }
5236     imark = i;
5237     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5238     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5239     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5240     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5241   } else {
5242     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5243     isrowb = *rowb; iscolb = *colb;
5244     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5245     bseq[0] = *B_seq;
5246   }
5247   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5248   *B_seq = bseq[0];
5249   ierr = PetscFree(bseq);CHKERRQ(ierr);
5250   if (!rowb){
5251     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5252   } else {
5253     *rowb = isrowb;
5254   }
5255   if (!colb){
5256     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5257   } else {
5258     *colb = iscolb;
5259   }
5260   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5261   PetscFunctionReturn(0);
5262 }
5263 
5264 #undef __FUNCT__
5265 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5266 /*
5267     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5268     of the OFF-DIAGONAL portion of local A
5269 
5270     Collective on Mat
5271 
5272    Input Parameters:
5273 +    A,B - the matrices in mpiaij format
5274 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5275 
5276    Output Parameter:
5277 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5278 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5279 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5280 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5281 
5282     Level: developer
5283 
5284 */
5285 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5286 {
5287   VecScatter_MPI_General *gen_to,*gen_from;
5288   PetscErrorCode         ierr;
5289   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5290   Mat_SeqAIJ             *b_oth;
5291   VecScatter             ctx=a->Mvctx;
5292   MPI_Comm               comm=((PetscObject)ctx)->comm;
5293   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5294   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5295   PetscScalar            *rvalues,*svalues;
5296   MatScalar              *b_otha,*bufa,*bufA;
5297   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5298   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5299   MPI_Status             *sstatus,rstatus;
5300   PetscMPIInt            jj;
5301   PetscInt               *cols,sbs,rbs;
5302   PetscScalar            *vals;
5303 
5304   PetscFunctionBegin;
5305   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5306     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5307   }
5308   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5309   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5310 
5311   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5312   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5313   rvalues  = gen_from->values; /* holds the length of receiving row */
5314   svalues  = gen_to->values;   /* holds the length of sending row */
5315   nrecvs   = gen_from->n;
5316   nsends   = gen_to->n;
5317 
5318   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5319   srow     = gen_to->indices;   /* local row index to be sent */
5320   sstarts  = gen_to->starts;
5321   sprocs   = gen_to->procs;
5322   sstatus  = gen_to->sstatus;
5323   sbs      = gen_to->bs;
5324   rstarts  = gen_from->starts;
5325   rprocs   = gen_from->procs;
5326   rbs      = gen_from->bs;
5327 
5328   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5329   if (scall == MAT_INITIAL_MATRIX){
5330     /* i-array */
5331     /*---------*/
5332     /*  post receives */
5333     for (i=0; i<nrecvs; i++){
5334       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5335       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5336       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5337     }
5338 
5339     /* pack the outgoing message */
5340     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5341     sstartsj[0] = 0;  rstartsj[0] = 0;
5342     len = 0; /* total length of j or a array to be sent */
5343     k = 0;
5344     for (i=0; i<nsends; i++){
5345       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5346       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5347       for (j=0; j<nrows; j++) {
5348         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5349         for (l=0; l<sbs; l++){
5350           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5351           rowlen[j*sbs+l] = ncols;
5352           len += ncols;
5353           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5354         }
5355         k++;
5356       }
5357       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5358       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5359     }
5360     /* recvs and sends of i-array are completed */
5361     i = nrecvs;
5362     while (i--) {
5363       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5364     }
5365     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5366 
5367     /* allocate buffers for sending j and a arrays */
5368     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5369     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5370 
5371     /* create i-array of B_oth */
5372     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5373     b_othi[0] = 0;
5374     len = 0; /* total length of j or a array to be received */
5375     k = 0;
5376     for (i=0; i<nrecvs; i++){
5377       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5378       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5379       for (j=0; j<nrows; j++) {
5380         b_othi[k+1] = b_othi[k] + rowlen[j];
5381         len += rowlen[j]; k++;
5382       }
5383       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5384     }
5385 
5386     /* allocate space for j and a arrrays of B_oth */
5387     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5388     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5389 
5390     /* j-array */
5391     /*---------*/
5392     /*  post receives of j-array */
5393     for (i=0; i<nrecvs; i++){
5394       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5395       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5396     }
5397 
5398     /* pack the outgoing message j-array */
5399     k = 0;
5400     for (i=0; i<nsends; i++){
5401       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5402       bufJ = bufj+sstartsj[i];
5403       for (j=0; j<nrows; j++) {
5404         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5405         for (ll=0; ll<sbs; ll++){
5406           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5407           for (l=0; l<ncols; l++){
5408             *bufJ++ = cols[l];
5409           }
5410           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5411         }
5412       }
5413       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5414     }
5415 
5416     /* recvs and sends of j-array are completed */
5417     i = nrecvs;
5418     while (i--) {
5419       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5420     }
5421     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5422   } else if (scall == MAT_REUSE_MATRIX){
5423     sstartsj = *startsj_s;
5424     rstartsj = *startsj_r;
5425     bufa     = *bufa_ptr;
5426     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5427     b_otha   = b_oth->a;
5428   } else {
5429     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5430   }
5431 
5432   /* a-array */
5433   /*---------*/
5434   /*  post receives of a-array */
5435   for (i=0; i<nrecvs; i++){
5436     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5437     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5438   }
5439 
5440   /* pack the outgoing message a-array */
5441   k = 0;
5442   for (i=0; i<nsends; i++){
5443     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5444     bufA = bufa+sstartsj[i];
5445     for (j=0; j<nrows; j++) {
5446       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5447       for (ll=0; ll<sbs; ll++){
5448         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5449         for (l=0; l<ncols; l++){
5450           *bufA++ = vals[l];
5451         }
5452         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5453       }
5454     }
5455     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5456   }
5457   /* recvs and sends of a-array are completed */
5458   i = nrecvs;
5459   while (i--) {
5460     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5461   }
5462   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5463   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5464 
5465   if (scall == MAT_INITIAL_MATRIX){
5466     /* put together the new matrix */
5467     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5468 
5469     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5470     /* Since these are PETSc arrays, change flags to free them as necessary. */
5471     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5472     b_oth->free_a  = PETSC_TRUE;
5473     b_oth->free_ij = PETSC_TRUE;
5474     b_oth->nonew   = 0;
5475 
5476     ierr = PetscFree(bufj);CHKERRQ(ierr);
5477     if (!startsj_s || !bufa_ptr){
5478       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5479       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5480     } else {
5481       *startsj_s = sstartsj;
5482       *startsj_r = rstartsj;
5483       *bufa_ptr  = bufa;
5484     }
5485   }
5486   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5487   PetscFunctionReturn(0);
5488 }
5489 
5490 #undef __FUNCT__
5491 #define __FUNCT__ "MatGetCommunicationStructs"
5492 /*@C
5493   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5494 
5495   Not Collective
5496 
5497   Input Parameters:
5498 . A - The matrix in mpiaij format
5499 
5500   Output Parameter:
5501 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5502 . colmap - A map from global column index to local index into lvec
5503 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5504 
5505   Level: developer
5506 
5507 @*/
5508 #if defined (PETSC_USE_CTABLE)
5509 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5510 #else
5511 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5512 #endif
5513 {
5514   Mat_MPIAIJ *a;
5515 
5516   PetscFunctionBegin;
5517   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5518   PetscValidPointer(lvec, 2);
5519   PetscValidPointer(colmap, 3);
5520   PetscValidPointer(multScatter, 4);
5521   a = (Mat_MPIAIJ *) A->data;
5522   if (lvec) *lvec = a->lvec;
5523   if (colmap) *colmap = a->colmap;
5524   if (multScatter) *multScatter = a->Mvctx;
5525   PetscFunctionReturn(0);
5526 }
5527 
5528 EXTERN_C_BEGIN
5529 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5530 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5531 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5532 EXTERN_C_END
5533 
5534 #undef __FUNCT__
5535 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5536 /*
5537     Computes (B'*A')' since computing B*A directly is untenable
5538 
5539                n                       p                          p
5540         (              )       (              )         (                  )
5541       m (      A       )  *  n (       B      )   =   m (         C        )
5542         (              )       (              )         (                  )
5543 
5544 */
5545 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5546 {
5547   PetscErrorCode     ierr;
5548   Mat                At,Bt,Ct;
5549 
5550   PetscFunctionBegin;
5551   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5552   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5553   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5554   ierr = MatDestroy(&At);CHKERRQ(ierr);
5555   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5556   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5557   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5558   PetscFunctionReturn(0);
5559 }
5560 
5561 #undef __FUNCT__
5562 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5563 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5564 {
5565   PetscErrorCode ierr;
5566   PetscInt       m=A->rmap->n,n=B->cmap->n;
5567   Mat            Cmat;
5568 
5569   PetscFunctionBegin;
5570   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5571   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5572   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5573   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5574   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5575   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5576   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5577   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5578 
5579   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5580   *C = Cmat;
5581   PetscFunctionReturn(0);
5582 }
5583 
5584 /* ----------------------------------------------------------------*/
5585 #undef __FUNCT__
5586 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5587 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5588 {
5589   PetscErrorCode ierr;
5590 
5591   PetscFunctionBegin;
5592   if (scall == MAT_INITIAL_MATRIX){
5593     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5594   }
5595   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5596   PetscFunctionReturn(0);
5597 }
5598 
5599 EXTERN_C_BEGIN
5600 #if defined(PETSC_HAVE_MUMPS)
5601 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5602 #endif
5603 #if defined(PETSC_HAVE_PASTIX)
5604 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5605 #endif
5606 #if defined(PETSC_HAVE_SUPERLU_DIST)
5607 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5608 #endif
5609 #if defined(PETSC_HAVE_CLIQUE)
5610 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5611 #endif
5612 EXTERN_C_END
5613 
5614 /*MC
5615    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5616 
5617    Options Database Keys:
5618 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5619 
5620   Level: beginner
5621 
5622 .seealso: MatCreateAIJ()
5623 M*/
5624 
5625 EXTERN_C_BEGIN
5626 #undef __FUNCT__
5627 #define __FUNCT__ "MatCreate_MPIAIJ"
5628 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5629 {
5630   Mat_MPIAIJ     *b;
5631   PetscErrorCode ierr;
5632   PetscMPIInt    size;
5633 
5634   PetscFunctionBegin;
5635   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5636   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5637   B->data         = (void*)b;
5638   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5639   B->assembled    = PETSC_FALSE;
5640   B->insertmode   = NOT_SET_VALUES;
5641   b->size         = size;
5642   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5643 
5644   /* build cache for off array entries formed */
5645   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5646   b->donotstash  = PETSC_FALSE;
5647   b->colmap      = 0;
5648   b->garray      = 0;
5649   b->roworiented = PETSC_TRUE;
5650 
5651   /* stuff used for matrix vector multiply */
5652   b->lvec      = PETSC_NULL;
5653   b->Mvctx     = PETSC_NULL;
5654 
5655   /* stuff for MatGetRow() */
5656   b->rowindices   = 0;
5657   b->rowvalues    = 0;
5658   b->getrowactive = PETSC_FALSE;
5659 
5660   /* flexible pointer used in CUSP/CUSPARSE classes */
5661   b->spptr        = PETSC_NULL;
5662 
5663 #if defined(PETSC_HAVE_MUMPS)
5664   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5665                                      "MatGetFactor_aij_mumps",
5666                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5667 #endif
5668 #if defined(PETSC_HAVE_PASTIX)
5669   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5670 					   "MatGetFactor_mpiaij_pastix",
5671 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5672 #endif
5673 #if defined(PETSC_HAVE_SUPERLU_DIST)
5674   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5675                                      "MatGetFactor_mpiaij_superlu_dist",
5676                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5677 #endif
5678 #if defined(PETSC_HAVE_CLIQUE)
5679   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5680                                      "MatGetFactor_aij_clique",
5681                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5682 #endif
5683   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5684                                      "MatStoreValues_MPIAIJ",
5685                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5686   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5687                                      "MatRetrieveValues_MPIAIJ",
5688                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5689   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5690 				     "MatGetDiagonalBlock_MPIAIJ",
5691                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5692   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5693 				     "MatIsTranspose_MPIAIJ",
5694 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5695   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5696 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5697 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5698   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5699 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5700 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5701   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5702 				     "MatDiagonalScaleLocal_MPIAIJ",
5703 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5704   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5705                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5706                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5707   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5708                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5709                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5710   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5711                                      "MatConvert_MPIAIJ_MPISBAIJ",
5712                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5713   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5714                                      "MatMatMult_MPIDense_MPIAIJ",
5715                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5716   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5717                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5718                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5719   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5720                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5721                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5722   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5723   PetscFunctionReturn(0);
5724 }
5725 EXTERN_C_END
5726 
5727 #undef __FUNCT__
5728 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5729 /*@
5730      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5731          and "off-diagonal" part of the matrix in CSR format.
5732 
5733    Collective on MPI_Comm
5734 
5735    Input Parameters:
5736 +  comm - MPI communicator
5737 .  m - number of local rows (Cannot be PETSC_DECIDE)
5738 .  n - This value should be the same as the local size used in creating the
5739        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5740        calculated if N is given) For square matrices n is almost always m.
5741 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5742 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5743 .   i - row indices for "diagonal" portion of matrix
5744 .   j - column indices
5745 .   a - matrix values
5746 .   oi - row indices for "off-diagonal" portion of matrix
5747 .   oj - column indices
5748 -   oa - matrix values
5749 
5750    Output Parameter:
5751 .   mat - the matrix
5752 
5753    Level: advanced
5754 
5755    Notes:
5756        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5757        must free the arrays once the matrix has been destroyed and not before.
5758 
5759        The i and j indices are 0 based
5760 
5761        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5762 
5763        This sets local rows and cannot be used to set off-processor values.
5764 
5765        You cannot later use MatSetValues() to change values in this matrix.
5766 
5767 .keywords: matrix, aij, compressed row, sparse, parallel
5768 
5769 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5770           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5771 @*/
5772 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5773 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5774 {
5775   PetscErrorCode ierr;
5776   Mat_MPIAIJ     *maij;
5777 
5778  PetscFunctionBegin;
5779   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5780   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5781   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5782   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5783   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5784   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5785   maij = (Mat_MPIAIJ*) (*mat)->data;
5786   maij->donotstash     = PETSC_TRUE;
5787   (*mat)->preallocated = PETSC_TRUE;
5788 
5789   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5790   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5791 
5792   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5793   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5794 
5795   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5796   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5797   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5798   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5799 
5800   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5801   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5802   PetscFunctionReturn(0);
5803 }
5804 
5805 /*
5806     Special version for direct calls from Fortran
5807 */
5808 #include <petsc-private/fortranimpl.h>
5809 
5810 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5811 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5812 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5813 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5814 #endif
5815 
5816 /* Change these macros so can be used in void function */
5817 #undef CHKERRQ
5818 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5819 #undef SETERRQ2
5820 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5821 #undef SETERRQ3
5822 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5823 #undef SETERRQ
5824 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5825 
5826 EXTERN_C_BEGIN
5827 #undef __FUNCT__
5828 #define __FUNCT__ "matsetvaluesmpiaij_"
5829 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5830 {
5831   Mat             mat = *mmat;
5832   PetscInt        m = *mm, n = *mn;
5833   InsertMode      addv = *maddv;
5834   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5835   PetscScalar     value;
5836   PetscErrorCode  ierr;
5837 
5838   MatCheckPreallocated(mat,1);
5839   if (mat->insertmode == NOT_SET_VALUES) {
5840     mat->insertmode = addv;
5841   }
5842 #if defined(PETSC_USE_DEBUG)
5843   else if (mat->insertmode != addv) {
5844     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5845   }
5846 #endif
5847   {
5848   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5849   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5850   PetscBool       roworiented = aij->roworiented;
5851 
5852   /* Some Variables required in the macro */
5853   Mat             A = aij->A;
5854   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5855   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5856   MatScalar       *aa = a->a;
5857   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5858   Mat             B = aij->B;
5859   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5860   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5861   MatScalar       *ba = b->a;
5862 
5863   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5864   PetscInt        nonew = a->nonew;
5865   MatScalar       *ap1,*ap2;
5866 
5867   PetscFunctionBegin;
5868   for (i=0; i<m; i++) {
5869     if (im[i] < 0) continue;
5870 #if defined(PETSC_USE_DEBUG)
5871     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5872 #endif
5873     if (im[i] >= rstart && im[i] < rend) {
5874       row      = im[i] - rstart;
5875       lastcol1 = -1;
5876       rp1      = aj + ai[row];
5877       ap1      = aa + ai[row];
5878       rmax1    = aimax[row];
5879       nrow1    = ailen[row];
5880       low1     = 0;
5881       high1    = nrow1;
5882       lastcol2 = -1;
5883       rp2      = bj + bi[row];
5884       ap2      = ba + bi[row];
5885       rmax2    = bimax[row];
5886       nrow2    = bilen[row];
5887       low2     = 0;
5888       high2    = nrow2;
5889 
5890       for (j=0; j<n; j++) {
5891         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5892         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5893         if (in[j] >= cstart && in[j] < cend){
5894           col = in[j] - cstart;
5895           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5896         } else if (in[j] < 0) continue;
5897 #if defined(PETSC_USE_DEBUG)
5898         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5899 #endif
5900         else {
5901           if (mat->was_assembled) {
5902             if (!aij->colmap) {
5903               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5904             }
5905 #if defined (PETSC_USE_CTABLE)
5906             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5907 	    col--;
5908 #else
5909             col = aij->colmap[in[j]] - 1;
5910 #endif
5911             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5912               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5913               col =  in[j];
5914               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5915               B = aij->B;
5916               b = (Mat_SeqAIJ*)B->data;
5917               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5918               rp2      = bj + bi[row];
5919               ap2      = ba + bi[row];
5920               rmax2    = bimax[row];
5921               nrow2    = bilen[row];
5922               low2     = 0;
5923               high2    = nrow2;
5924               bm       = aij->B->rmap->n;
5925               ba = b->a;
5926             }
5927           } else col = in[j];
5928           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5929         }
5930       }
5931     } else {
5932       if (!aij->donotstash) {
5933         if (roworiented) {
5934           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5935         } else {
5936           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5937         }
5938       }
5939     }
5940   }}
5941   PetscFunctionReturnVoid();
5942 }
5943 EXTERN_C_END
5944 
5945