xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e383bbd07f1c175fdb04eea9fdcaf8d9f9e92c0f)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for (j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
110   PetscErrorCode ierr;
111   PetscInt       i,rstart,nrows,*rows;
112 
113   PetscFunctionBegin;
114   *zrows = PETSC_NULL;
115   ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
116   ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr);
117   for (i=0; i<nrows; i++) rows[i] += rstart;
118   ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125 {
126   PetscErrorCode ierr;
127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
128   PetscInt       i,n,*garray = aij->garray;
129   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
130   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
131   PetscReal      *work;
132 
133   PetscFunctionBegin;
134   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
135   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
136   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
137   if (type == NORM_2) {
138     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140     }
141     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143     }
144   } else if (type == NORM_1) {
145     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147     }
148     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150     }
151   } else if (type == NORM_INFINITY) {
152     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154     }
155     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157     }
158 
159   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160   if (type == NORM_INFINITY) {
161     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
162   } else {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
164   }
165   ierr = PetscFree(work);CHKERRQ(ierr);
166   if (type == NORM_2) {
167     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatDistribute_MPIAIJ"
174 /*
175     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
177 
178     Only for square matrices
179 */
180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181 {
182   PetscMPIInt    rank,size;
183   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
184   PetscErrorCode ierr;
185   Mat            mat;
186   Mat_SeqAIJ     *gmata;
187   PetscMPIInt    tag;
188   MPI_Status     status;
189   PetscBool      aij;
190   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
191 
192   PetscFunctionBegin;
193   CHKMEMQ;
194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (!rank) {
197     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
198     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199   }
200   if (reuse == MAT_INITIAL_MATRIX) {
201     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
202     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
203     if (!rank) {
204       bses[0] = gmat->rmap->bs;
205       bses[1] = gmat->cmap->bs;
206     }
207     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
208     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
209     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
210     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
211     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
212     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) {
215       rowners[i] += rowners[i-1];
216     }
217     rstart = rowners[rank];
218     rend   = rowners[rank+1];
219     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
220     if (!rank) {
221       gmata = (Mat_SeqAIJ*) gmat->data;
222       /* send row lengths to all processors */
223       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224       for (i=1; i<size; i++) {
225 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
226       }
227       /* determine number diagonal and off-diagonal counts */
228       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
229       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
230       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
231       jj = 0;
232       for (i=0; i<m; i++) {
233 	for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236 	  jj++;
237 	}
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
264       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
265       jj = 0;
266       for (i=0; i<m; i++) {
267 	for (j=0; j<dlens[i]; j++) {
268           if (gmataj[jj] < rstart) ld[i]++;
269 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270 	  jj++;
271 	}
272       }
273       /* receive numerical values */
274       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
275       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
276     }
277     /* set preallocation */
278     for (i=0; i<m; i++) {
279       dlens[i] -= olens[i];
280     }
281     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
282     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
283 
284     for (i=0; i<m; i++) {
285       dlens[i] += olens[i];
286     }
287     cnt  = 0;
288     for (i=0; i<m; i++) {
289       row  = rstart + i;
290       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
291       cnt += dlens[i];
292     }
293     if (rank) {
294       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
295     }
296     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
297     ierr = PetscFree(rowners);CHKERRQ(ierr);
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299     *inmat = mat;
300   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
301     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303     mat   = *inmat;
304     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
305     if (!rank) {
306       /* send numerical values to other processes */
307       gmata = (Mat_SeqAIJ*) gmat->data;
308       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
309       gmataa = gmata->a;
310       for (i=1; i<size; i++) {
311         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
313       }
314       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315     } else {
316       /* receive numerical values from process 0*/
317       nz   = Ad->nz + Ao->nz;
318       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
319       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
320     }
321     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323     ad = Ad->a;
324     ao = Ao->a;
325     if (mat->rmap->n) {
326       i  = 0;
327       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
328       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
329     }
330     for (i=1; i<mat->rmap->n; i++) {
331       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     i--;
335     if (mat->rmap->n) {
336       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
337     }
338     if (rank) {
339       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
340     }
341   }
342   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   CHKMEMQ;
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined (PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++){
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
373   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
374   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375 #endif
376   PetscFunctionReturn(0);
377 }
378 
379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380 { \
381     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else             low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414 { \
415     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
416     lastcol2 = col;\
417     while (high2-low2 > 5) { \
418       t = (low2+high2)/2; \
419       if (rp2[t] > col) high2 = t; \
420       else             low2  = t; \
421     } \
422     for (_i=low2; _i<high2; _i++) {		\
423       if (rp2[_i] > col) break;			\
424       if (rp2[_i] == col) {			      \
425 	if (addv == ADD_VALUES) ap2[_i] += value;     \
426 	else                    ap2[_i] = value;      \
427 	goto b_noinsert;			      \
428       }						      \
429     }							      \
430     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
432     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
433     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
434     N = nrow2++ - 1; b->nz++; high2++;					\
435     /* shift up all the later entries in this row */			\
436     for (ii=N; ii>=_i; ii--) {						\
437       rp2[ii+1] = rp2[ii];						\
438       ap2[ii+1] = ap2[ii];						\
439     }									\
440     rp2[_i] = col;							\
441     ap2[_i] = value;							\
442     b_noinsert: ;								\
443     bilen[row] = nrow2;							\
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat            A = aij->A;
487   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
488   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar      *aa = a->a;
490   PetscBool      ignorezeroentries = a->ignorezeroentries;
491   Mat            B = aij->B;
492   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
493   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar      *ba = b->a;
495 
496   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt       nonew;
498   MatScalar      *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend){
528           col = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined (PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542 	    col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B = aij->B;
551               b = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2      = bj + bi[row];
554               ap2      = ba + bi[row];
555               rmax2    = bimax[row];
556               nrow2    = bilen[row];
557               low2     = 0;
558               high2    = nrow2;
559               bm       = aij->B->rmap->n;
560               ba = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend){
601           col = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined (PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col --;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else {
620       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621     }
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscErrorCode ierr;
634   PetscInt       nstash,reallocs;
635   InsertMode     addv;
636 
637   PetscFunctionBegin;
638   if (aij->donotstash || mat->nooffprocentries) {
639     PetscFunctionReturn(0);
640   }
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n;) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
724 {
725   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
726   PetscErrorCode ierr;
727 
728   PetscFunctionBegin;
729   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
730   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatZeroRows_MPIAIJ"
736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
737 {
738   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
739   PetscErrorCode    ierr;
740   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
741   PetscInt          i,*owners = A->rmap->range;
742   PetscInt          *nprocs,j,idx,nsends,row;
743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
744   PetscInt          *rvalues,count,base,slen,*source;
745   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
746   MPI_Comm          comm = ((PetscObject)A)->comm;
747   MPI_Request       *send_waits,*recv_waits;
748   MPI_Status        recv_status,*send_status;
749   const PetscScalar *xx;
750   PetscScalar       *bb;
751 #if defined(PETSC_DEBUG)
752   PetscBool      found = PETSC_FALSE;
753 #endif
754 
755   PetscFunctionBegin;
756   /*  first count number of contributors to each processor */
757   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
758   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
759   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
760   j = 0;
761   for (i=0; i<N; i++) {
762     if (lastidx > (idx = rows[i])) j = 0;
763     lastidx = idx;
764     for (; j<size; j++) {
765       if (idx >= owners[j] && idx < owners[j+1]) {
766         nprocs[2*j]++;
767         nprocs[2*j+1] = 1;
768         owner[i] = j;
769 #if defined(PETSC_DEBUG)
770         found = PETSC_TRUE;
771 #endif
772         break;
773       }
774     }
775 #if defined(PETSC_DEBUG)
776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
777     found = PETSC_FALSE;
778 #endif
779   }
780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
781 
782   if (A->nooffproczerorows) {
783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
784     nrecvs = nsends;
785     nmax   = N;
786   } else {
787     /* inform other processors of number of messages and max length*/
788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
789   }
790 
791   /* post receives:   */
792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
794   for (i=0; i<nrecvs; i++) {
795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
796   }
797 
798   /* do sends:
799       1) starts[i] gives the starting index in svalues for stuff going to
800          the ith processor
801   */
802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
805   starts[0] = 0;
806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
807   for (i=0; i<N; i++) {
808     svalues[starts[owner[i]]++] = rows[i];
809   }
810 
811   starts[0] = 0;
812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
813   count = 0;
814   for (i=0; i<size; i++) {
815     if (nprocs[2*i+1]) {
816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
817     }
818   }
819   ierr = PetscFree(starts);CHKERRQ(ierr);
820 
821   base = owners[rank];
822 
823   /*  wait on receives */
824   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
825   count  = nrecvs; slen = 0;
826   while (count) {
827     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
828     /* unpack receives into our local space */
829     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
830     source[imdex]  = recv_status.MPI_SOURCE;
831     lens[imdex]    = n;
832     slen          += n;
833     count--;
834   }
835   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
836 
837   /* move the data into the send scatter */
838   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
839   count = 0;
840   for (i=0; i<nrecvs; i++) {
841     values = rvalues + i*nmax;
842     for (j=0; j<lens[i]; j++) {
843       lrows[count++] = values[j] - base;
844     }
845   }
846   ierr = PetscFree(rvalues);CHKERRQ(ierr);
847   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
848   ierr = PetscFree(owner);CHKERRQ(ierr);
849   ierr = PetscFree(nprocs);CHKERRQ(ierr);
850 
851   /* fix right hand side if needed */
852   if (x && b) {
853     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
854     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
855     for (i=0; i<slen; i++) {
856       bb[lrows[i]] = diag*xx[lrows[i]];
857     }
858     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
859     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
860   }
861   /*
862         Zero the required rows. If the "diagonal block" of the matrix
863      is square and the user wishes to set the diagonal we use separate
864      code so that MatSetValues() is not called for each diagonal allocating
865      new memory, thus calling lots of mallocs and slowing things down.
866 
867   */
868   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
869   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
870   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
871     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
872   } else if (diag != 0.0) {
873     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
874     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
875       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
877     }
878     for (i = 0; i < slen; i++) {
879       row  = lrows[i] + rstart;
880       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
881     }
882     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
883     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884   } else {
885     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
886   }
887   ierr = PetscFree(lrows);CHKERRQ(ierr);
888 
889   /* wait on sends */
890   if (nsends) {
891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
893     ierr = PetscFree(send_status);CHKERRQ(ierr);
894   }
895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
896   ierr = PetscFree(svalues);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
903 {
904   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
905   PetscErrorCode    ierr;
906   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
907   PetscInt          i,*owners = A->rmap->range;
908   PetscInt          *nprocs,j,idx,nsends;
909   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
910   PetscInt          *rvalues,count,base,slen,*source;
911   PetscInt          *lens,*lrows,*values,m;
912   MPI_Comm          comm = ((PetscObject)A)->comm;
913   MPI_Request       *send_waits,*recv_waits;
914   MPI_Status        recv_status,*send_status;
915   const PetscScalar *xx;
916   PetscScalar       *bb,*mask;
917   Vec               xmask,lmask;
918   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
919   const PetscInt    *aj, *ii,*ridx;
920   PetscScalar       *aa;
921 #if defined(PETSC_DEBUG)
922   PetscBool         found = PETSC_FALSE;
923 #endif
924 
925   PetscFunctionBegin;
926   /*  first count number of contributors to each processor */
927   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
928   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
929   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
930   j = 0;
931   for (i=0; i<N; i++) {
932     if (lastidx > (idx = rows[i])) j = 0;
933     lastidx = idx;
934     for (; j<size; j++) {
935       if (idx >= owners[j] && idx < owners[j+1]) {
936         nprocs[2*j]++;
937         nprocs[2*j+1] = 1;
938         owner[i] = j;
939 #if defined(PETSC_DEBUG)
940         found = PETSC_TRUE;
941 #endif
942         break;
943       }
944     }
945 #if defined(PETSC_DEBUG)
946     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
947     found = PETSC_FALSE;
948 #endif
949   }
950   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
951 
952   /* inform other processors of number of messages and max length*/
953   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
954 
955   /* post receives:   */
956   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
957   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
958   for (i=0; i<nrecvs; i++) {
959     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
960   }
961 
962   /* do sends:
963       1) starts[i] gives the starting index in svalues for stuff going to
964          the ith processor
965   */
966   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
967   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
968   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
969   starts[0] = 0;
970   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
971   for (i=0; i<N; i++) {
972     svalues[starts[owner[i]]++] = rows[i];
973   }
974 
975   starts[0] = 0;
976   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
977   count = 0;
978   for (i=0; i<size; i++) {
979     if (nprocs[2*i+1]) {
980       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
981     }
982   }
983   ierr = PetscFree(starts);CHKERRQ(ierr);
984 
985   base = owners[rank];
986 
987   /*  wait on receives */
988   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
989   count  = nrecvs; slen = 0;
990   while (count) {
991     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
992     /* unpack receives into our local space */
993     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
994     source[imdex]  = recv_status.MPI_SOURCE;
995     lens[imdex]    = n;
996     slen          += n;
997     count--;
998   }
999   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1000 
1001   /* move the data into the send scatter */
1002   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1003   count = 0;
1004   for (i=0; i<nrecvs; i++) {
1005     values = rvalues + i*nmax;
1006     for (j=0; j<lens[i]; j++) {
1007       lrows[count++] = values[j] - base;
1008     }
1009   }
1010   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1011   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1012   ierr = PetscFree(owner);CHKERRQ(ierr);
1013   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1014   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1015 
1016   /* zero diagonal part of matrix */
1017   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1018 
1019   /* handle off diagonal part of matrix */
1020   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
1021   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1022   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1023   for (i=0; i<slen; i++) {
1024     bb[lrows[i]] = 1;
1025   }
1026   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1027   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1030   if (x) {
1031     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1034     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1035   }
1036   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1037 
1038   /* remove zeroed rows of off diagonal matrix */
1039   ii = aij->i;
1040   for (i=0; i<slen; i++) {
1041     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1042   }
1043 
1044   /* loop over all elements of off process part of matrix zeroing removed columns*/
1045   if (aij->compressedrow.use){
1046     m    = aij->compressedrow.nrows;
1047     ii   = aij->compressedrow.i;
1048     ridx = aij->compressedrow.rindex;
1049     for (i=0; i<m; i++){
1050       n   = ii[i+1] - ii[i];
1051       aj  = aij->j + ii[i];
1052       aa  = aij->a + ii[i];
1053 
1054       for (j=0; j<n; j++) {
1055         if (PetscAbsScalar(mask[*aj])) {
1056           if (b) bb[*ridx] -= *aa*xx[*aj];
1057           *aa        = 0.0;
1058         }
1059         aa++;
1060         aj++;
1061       }
1062       ridx++;
1063     }
1064   } else { /* do not use compressed row format */
1065     m = l->B->rmap->n;
1066     for (i=0; i<m; i++) {
1067       n   = ii[i+1] - ii[i];
1068       aj  = aij->j + ii[i];
1069       aa  = aij->a + ii[i];
1070       for (j=0; j<n; j++) {
1071         if (PetscAbsScalar(mask[*aj])) {
1072           if (b) bb[i] -= *aa*xx[*aj];
1073           *aa    = 0.0;
1074         }
1075         aa++;
1076         aj++;
1077       }
1078     }
1079   }
1080   if (x) {
1081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1082     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1083   }
1084   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1085   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1086   ierr = PetscFree(lrows);CHKERRQ(ierr);
1087 
1088   /* wait on sends */
1089   if (nsends) {
1090     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1091     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1092     ierr = PetscFree(send_status);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1095   ierr = PetscFree(svalues);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1179 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1180 {
1181   MPI_Comm       comm;
1182   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1183   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184   IS             Me,Notme;
1185   PetscErrorCode ierr;
1186   PetscInt       M,N,first,last,*notme,i;
1187   PetscMPIInt    size;
1188 
1189   PetscFunctionBegin;
1190 
1191   /* Easy test: symmetric diagonal block */
1192   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1193   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1194   if (!*f) PetscFunctionReturn(0);
1195   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1197   if (size == 1) PetscFunctionReturn(0);
1198 
1199   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1200   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1201   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1202   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1203   for (i=0; i<first; i++) notme[i] = i;
1204   for (i=last; i<M; i++) notme[i-last+first] = i;
1205   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1206   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1207   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1208   Aoff = Aoffs[0];
1209   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1210   Boff = Boffs[0];
1211   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1212   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1213   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1214   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1215   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1216   ierr = PetscFree(notme);CHKERRQ(ierr);
1217    PetscFunctionReturn(0);
1218 }
1219 EXTERN_C_END
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1224 {
1225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   /* do nondiagonal part */
1230   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1231   /* send it on its way */
1232   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233   /* do local part */
1234   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1235   /* receive remote parts */
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*
1241   This only works correctly for square matrices where the subblock A->A is the
1242    diagonal block
1243 */
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1247 {
1248   PetscErrorCode ierr;
1249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1250 
1251   PetscFunctionBegin;
1252   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1253   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatScale_MPIAIJ"
1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1261 {
1262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1267   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDestroy_MPIAIJ"
1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1274 {
1275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279 #if defined(PETSC_USE_LOG)
1280   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1281 #endif
1282   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1283   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1284   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1285   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1286 #if defined (PETSC_USE_CTABLE)
1287   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1288 #else
1289   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1292   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1293   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1294   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1295   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1313 {
1314   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1316   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1319   int               fd;
1320   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1321   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1322   PetscScalar       *column_values;
1323   PetscInt          message_count,flowcontrolcount;
1324   FILE              *file;
1325 
1326   PetscFunctionBegin;
1327   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1328   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1329   nz   = A->nz + B->nz;
1330   if (!rank) {
1331     header[0] = MAT_FILE_CLASSID;
1332     header[1] = mat->rmap->N;
1333     header[2] = mat->cmap->N;
1334     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1335     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1336     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1337     /* get largest number of rows any processor has */
1338     rlen = mat->rmap->n;
1339     range = mat->rmap->range;
1340     for (i=1; i<size; i++) {
1341       rlen = PetscMax(rlen,range[i+1] - range[i]);
1342     }
1343   } else {
1344     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1345     rlen = mat->rmap->n;
1346   }
1347 
1348   /* load up the local row counts */
1349   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1350   for (i=0; i<mat->rmap->n; i++) {
1351     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352   }
1353 
1354   /* store the row lengths to the file */
1355   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1356   if (!rank) {
1357     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1358     for (i=1; i<size; i++) {
1359       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1360       rlen = range[i+1] - range[i];
1361       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1362       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1363     }
1364     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1367     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1368     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1369   }
1370   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1371 
1372   /* load up the local column indices */
1373   nzmax = nz; /* )th processor needs space a largest processor needs */
1374   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1376   cnt  = 0;
1377   for (i=0; i<mat->rmap->n; i++) {
1378     for (j=B->i[i]; j<B->i[i+1]; j++) {
1379       if ( (col = garray[B->j[j]]) > cstart) break;
1380       column_indices[cnt++] = col;
1381     }
1382     for (k=A->i[i]; k<A->i[i+1]; k++) {
1383       column_indices[cnt++] = A->j[k] + cstart;
1384     }
1385     for (; j<B->i[i+1]; j++) {
1386       column_indices[cnt++] = garray[B->j[j]];
1387     }
1388   }
1389   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1390 
1391   /* store the column indices to the file */
1392    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1393   if (!rank) {
1394     MPI_Status status;
1395     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1396     for (i=1; i<size; i++) {
1397       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1398       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1399       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1400       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1401       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1402     }
1403      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1404   } else {
1405     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1406     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1408     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1409   }
1410   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1411 
1412   /* load up the local column values */
1413   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1414   cnt  = 0;
1415   for (i=0; i<mat->rmap->n; i++) {
1416     for (j=B->i[i]; j<B->i[i+1]; j++) {
1417       if ( garray[B->j[j]] > cstart) break;
1418       column_values[cnt++] = B->a[j];
1419     }
1420     for (k=A->i[i]; k<A->i[i+1]; k++) {
1421       column_values[cnt++] = A->a[k];
1422     }
1423     for (; j<B->i[i+1]; j++) {
1424       column_values[cnt++] = B->a[j];
1425     }
1426   }
1427   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1428 
1429   /* store the column values to the file */
1430    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1431   if (!rank) {
1432     MPI_Status status;
1433     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1434     for (i=1; i<size; i++) {
1435        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1436       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1437       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1438       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1439       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1440     }
1441     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1442   } else {
1443     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1444     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1445     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1446     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1447   }
1448   ierr = PetscFree(column_values);CHKERRQ(ierr);
1449 
1450   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1451   if (file) {
1452     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1453   }
1454 
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1461 {
1462   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1463   PetscErrorCode    ierr;
1464   PetscMPIInt       rank = aij->rank,size = aij->size;
1465   PetscBool         isdraw,iascii,isbinary;
1466   PetscViewer       sviewer;
1467   PetscViewerFormat format;
1468 
1469   PetscFunctionBegin;
1470   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1472   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1473   if (iascii) {
1474     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1475     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1476       MatInfo    info;
1477       PetscBool  inodes;
1478 
1479       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1480       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1481       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1482       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1483       if (!inodes) {
1484         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1485 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1486       } else {
1487         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1488 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1489       }
1490       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1491       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1492       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1493       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1494       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1495       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1496       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1497       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1498       PetscFunctionReturn(0);
1499     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1500       PetscInt   inodecount,inodelimit,*inodes;
1501       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1502       if (inodes) {
1503         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1504       } else {
1505         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1506       }
1507       PetscFunctionReturn(0);
1508     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1509       PetscFunctionReturn(0);
1510     }
1511   } else if (isbinary) {
1512     if (size == 1) {
1513       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1514       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1515     } else {
1516       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1517     }
1518     PetscFunctionReturn(0);
1519   } else if (isdraw) {
1520     PetscDraw  draw;
1521     PetscBool  isnull;
1522     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1523     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1524   }
1525 
1526   if (size == 1) {
1527     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1528     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1529   } else {
1530     /* assemble the entire matrix onto first processor. */
1531     Mat         A;
1532     Mat_SeqAIJ  *Aloc;
1533     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1534     MatScalar   *a;
1535 
1536     if (mat->rmap->N > 1024) {
1537       PetscBool  flg = PETSC_FALSE;
1538 
1539       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1540       if (!flg) {
1541         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1542       }
1543     }
1544 
1545     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1546     if (!rank) {
1547       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1548     } else {
1549       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1550     }
1551     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1552     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1553     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1554     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1555     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1556 
1557     /* copy over the A part */
1558     Aloc = (Mat_SeqAIJ*)aij->A->data;
1559     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1560     row = mat->rmap->rstart;
1561     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1562     for (i=0; i<m; i++) {
1563       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1564       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1565     }
1566     aj = Aloc->j;
1567     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1568 
1569     /* copy over the B part */
1570     Aloc = (Mat_SeqAIJ*)aij->B->data;
1571     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1572     row  = mat->rmap->rstart;
1573     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1574     ct   = cols;
1575     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1576     for (i=0; i<m; i++) {
1577       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1578       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1579     }
1580     ierr = PetscFree(ct);CHKERRQ(ierr);
1581     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583     /*
1584        Everyone has to call to draw the matrix since the graphics waits are
1585        synchronized across all processors that share the PetscDraw object
1586     */
1587     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1588     if (!rank) {
1589       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1590       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1591       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1592       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1593     }
1594     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1595     ierr = MatDestroy(&A);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatView_MPIAIJ"
1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1603 {
1604   PetscErrorCode ierr;
1605   PetscBool      iascii,isdraw,issocket,isbinary;
1606 
1607   PetscFunctionBegin;
1608   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1609   ierr  = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1610   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1611   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1612   if (iascii || isdraw || isbinary || issocket) {
1613     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1614   } else {
1615     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatSOR_MPIAIJ"
1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1623 {
1624   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1625   PetscErrorCode ierr;
1626   Vec            bb1 = 0;
1627   PetscBool      hasop;
1628 
1629   PetscFunctionBegin;
1630   if (flag == SOR_APPLY_UPPER) {
1631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1632     PetscFunctionReturn(0);
1633   }
1634 
1635   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1636     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1637   }
1638 
1639   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1640     if (flag & SOR_ZERO_INITIAL_GUESS) {
1641       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1642       its--;
1643     }
1644 
1645     while (its--) {
1646       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1648 
1649       /* update rhs: bb1 = bb - B*x */
1650       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1651       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1652 
1653       /* local sweep */
1654       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1655     }
1656   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1657     if (flag & SOR_ZERO_INITIAL_GUESS) {
1658       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1659       its--;
1660     }
1661     while (its--) {
1662       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664 
1665       /* update rhs: bb1 = bb - B*x */
1666       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1667       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1668 
1669       /* local sweep */
1670       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1671     }
1672   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1673     if (flag & SOR_ZERO_INITIAL_GUESS) {
1674       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1675       its--;
1676     }
1677     while (its--) {
1678       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1680 
1681       /* update rhs: bb1 = bb - B*x */
1682       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1683       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1684 
1685       /* local sweep */
1686       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1687     }
1688   }  else if (flag & SOR_EISENSTAT) {
1689     Vec         xx1;
1690 
1691     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1692     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1693 
1694     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696     if (!mat->diag) {
1697       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1698       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1699     }
1700     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1701     if (hasop) {
1702       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1703     } else {
1704       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1705     }
1706     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1707 
1708     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1709 
1710     /* local sweep */
1711     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1712     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1713     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1714   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1715 
1716   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatPermute_MPIAIJ"
1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1723 {
1724   Mat            aA,aB,Aperm;
1725   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1726   PetscScalar    *aa,*ba;
1727   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1728   PetscSF        rowsf,sf;
1729   IS             parcolp = PETSC_NULL;
1730   PetscBool      done;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1735   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1736   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1737   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1738 
1739   /* Invert row permutation to find out where my rows should go */
1740   ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr);
1741   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1742   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1743   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1744   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1745   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1746 
1747   /* Invert column permutation to find out where my columns should go */
1748   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1749   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1750   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1751   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1752   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1753   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1754   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1755 
1756   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1757   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1758   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1759 
1760   /* Find out where my gcols should go */
1761   ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr);
1762   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1763   ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
1764   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1765   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1766   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1767   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1768   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1769 
1770   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1771   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1772   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1773   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1774   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1775   for (i=0; i<m; i++) {
1776     PetscInt row = rdest[i],rowner;
1777     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1778     for (j=ai[i]; j<ai[i+1]; j++) {
1779       PetscInt cowner,col = cdest[aj[j]];
1780       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1781       if (rowner == cowner) dnnz[i]++;
1782       else onnz[i]++;
1783     }
1784     for (j=bi[i]; j<bi[i+1]; j++) {
1785       PetscInt cowner,col = gcdest[bj[j]];
1786       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1787       if (rowner == cowner) dnnz[i]++;
1788       else onnz[i]++;
1789     }
1790   }
1791   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1792   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1793   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1794   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1795   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1796   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1797   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1798 
1799   ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1800   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1801   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1802   for (i=0; i<m; i++) {
1803     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1804     PetscInt rowlen;
1805     rowlen = ai[i+1] - ai[i];
1806     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1807     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1808     rowlen = bi[i+1] - bi[i];
1809     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1810     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1811   }
1812   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1813   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1814   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1815   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1816   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1817   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1818   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1819   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1820   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1821   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1822   *B = Aperm;
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 #undef __FUNCT__
1827 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1828 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1829 {
1830   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1831   Mat            A = mat->A,B = mat->B;
1832   PetscErrorCode ierr;
1833   PetscReal      isend[5],irecv[5];
1834 
1835   PetscFunctionBegin;
1836   info->block_size     = 1.0;
1837   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1838   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1839   isend[3] = info->memory;  isend[4] = info->mallocs;
1840   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1841   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1842   isend[3] += info->memory;  isend[4] += info->mallocs;
1843   if (flag == MAT_LOCAL) {
1844     info->nz_used      = isend[0];
1845     info->nz_allocated = isend[1];
1846     info->nz_unneeded  = isend[2];
1847     info->memory       = isend[3];
1848     info->mallocs      = isend[4];
1849   } else if (flag == MAT_GLOBAL_MAX) {
1850     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1851     info->nz_used      = irecv[0];
1852     info->nz_allocated = irecv[1];
1853     info->nz_unneeded  = irecv[2];
1854     info->memory       = irecv[3];
1855     info->mallocs      = irecv[4];
1856   } else if (flag == MAT_GLOBAL_SUM) {
1857     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1858     info->nz_used      = irecv[0];
1859     info->nz_allocated = irecv[1];
1860     info->nz_unneeded  = irecv[2];
1861     info->memory       = irecv[3];
1862     info->mallocs      = irecv[4];
1863   }
1864   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1865   info->fill_ratio_needed = 0;
1866   info->factor_mallocs    = 0;
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatSetOption_MPIAIJ"
1872 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1873 {
1874   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1875   PetscErrorCode ierr;
1876 
1877   PetscFunctionBegin;
1878   switch (op) {
1879   case MAT_NEW_NONZERO_LOCATIONS:
1880   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1881   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1882   case MAT_KEEP_NONZERO_PATTERN:
1883   case MAT_NEW_NONZERO_LOCATION_ERR:
1884   case MAT_USE_INODES:
1885   case MAT_IGNORE_ZERO_ENTRIES:
1886     MatCheckPreallocated(A,1);
1887     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1888     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1889     break;
1890   case MAT_ROW_ORIENTED:
1891     a->roworiented = flg;
1892     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1893     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1894     break;
1895   case MAT_NEW_DIAGONALS:
1896     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1897     break;
1898   case MAT_IGNORE_OFF_PROC_ENTRIES:
1899     a->donotstash = flg;
1900     break;
1901   case MAT_SPD:
1902     A->spd_set                         = PETSC_TRUE;
1903     A->spd                             = flg;
1904     if (flg) {
1905       A->symmetric                     = PETSC_TRUE;
1906       A->structurally_symmetric        = PETSC_TRUE;
1907       A->symmetric_set                 = PETSC_TRUE;
1908       A->structurally_symmetric_set    = PETSC_TRUE;
1909     }
1910     break;
1911   case MAT_SYMMETRIC:
1912     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1913     break;
1914   case MAT_STRUCTURALLY_SYMMETRIC:
1915     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1916     break;
1917   case MAT_HERMITIAN:
1918     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1919     break;
1920   case MAT_SYMMETRY_ETERNAL:
1921     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1922     break;
1923   default:
1924     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1925   }
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatGetRow_MPIAIJ"
1931 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1932 {
1933   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1934   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1935   PetscErrorCode ierr;
1936   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1937   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1938   PetscInt       *cmap,*idx_p;
1939 
1940   PetscFunctionBegin;
1941   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1942   mat->getrowactive = PETSC_TRUE;
1943 
1944   if (!mat->rowvalues && (idx || v)) {
1945     /*
1946         allocate enough space to hold information from the longest row.
1947     */
1948     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1949     PetscInt   max = 1,tmp;
1950     for (i=0; i<matin->rmap->n; i++) {
1951       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1952       if (max < tmp) { max = tmp; }
1953     }
1954     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1955   }
1956 
1957   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1958   lrow = row - rstart;
1959 
1960   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1961   if (!v)   {pvA = 0; pvB = 0;}
1962   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1963   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1964   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1965   nztot = nzA + nzB;
1966 
1967   cmap  = mat->garray;
1968   if (v  || idx) {
1969     if (nztot) {
1970       /* Sort by increasing column numbers, assuming A and B already sorted */
1971       PetscInt imark = -1;
1972       if (v) {
1973         *v = v_p = mat->rowvalues;
1974         for (i=0; i<nzB; i++) {
1975           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1976           else break;
1977         }
1978         imark = i;
1979         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1980         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1981       }
1982       if (idx) {
1983         *idx = idx_p = mat->rowindices;
1984         if (imark > -1) {
1985           for (i=0; i<imark; i++) {
1986             idx_p[i] = cmap[cworkB[i]];
1987           }
1988         } else {
1989           for (i=0; i<nzB; i++) {
1990             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1991             else break;
1992           }
1993           imark = i;
1994         }
1995         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1996         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1997       }
1998     } else {
1999       if (idx) *idx = 0;
2000       if (v)   *v   = 0;
2001     }
2002   }
2003   *nz = nztot;
2004   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
2005   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
2011 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2012 {
2013   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2014 
2015   PetscFunctionBegin;
2016   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2017   aij->getrowactive = PETSC_FALSE;
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 #undef __FUNCT__
2022 #define __FUNCT__ "MatNorm_MPIAIJ"
2023 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2024 {
2025   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2026   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2027   PetscErrorCode ierr;
2028   PetscInt       i,j,cstart = mat->cmap->rstart;
2029   PetscReal      sum = 0.0;
2030   MatScalar      *v;
2031 
2032   PetscFunctionBegin;
2033   if (aij->size == 1) {
2034     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2035   } else {
2036     if (type == NORM_FROBENIUS) {
2037       v = amat->a;
2038       for (i=0; i<amat->nz; i++) {
2039 #if defined(PETSC_USE_COMPLEX)
2040         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2041 #else
2042         sum += (*v)*(*v); v++;
2043 #endif
2044       }
2045       v = bmat->a;
2046       for (i=0; i<bmat->nz; i++) {
2047 #if defined(PETSC_USE_COMPLEX)
2048         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2049 #else
2050         sum += (*v)*(*v); v++;
2051 #endif
2052       }
2053       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2054       *norm = PetscSqrtReal(*norm);
2055     } else if (type == NORM_1) { /* max column norm */
2056       PetscReal *tmp,*tmp2;
2057       PetscInt  *jj,*garray = aij->garray;
2058       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2059       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2060       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2061       *norm = 0.0;
2062       v = amat->a; jj = amat->j;
2063       for (j=0; j<amat->nz; j++) {
2064         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
2065       }
2066       v = bmat->a; jj = bmat->j;
2067       for (j=0; j<bmat->nz; j++) {
2068         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2069       }
2070       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
2071       for (j=0; j<mat->cmap->N; j++) {
2072         if (tmp2[j] > *norm) *norm = tmp2[j];
2073       }
2074       ierr = PetscFree(tmp);CHKERRQ(ierr);
2075       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2076     } else if (type == NORM_INFINITY) { /* max row norm */
2077       PetscReal ntemp = 0.0;
2078       for (j=0; j<aij->A->rmap->n; j++) {
2079         v = amat->a + amat->i[j];
2080         sum = 0.0;
2081         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2082           sum += PetscAbsScalar(*v); v++;
2083         }
2084         v = bmat->a + bmat->i[j];
2085         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2086           sum += PetscAbsScalar(*v); v++;
2087         }
2088         if (sum > ntemp) ntemp = sum;
2089       }
2090       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2091     } else {
2092       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2093     }
2094   }
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatTranspose_MPIAIJ"
2100 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2101 {
2102   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2103   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2104   PetscErrorCode ierr;
2105   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2106   PetscInt       cstart=A->cmap->rstart,ncol;
2107   Mat            B;
2108   MatScalar      *array;
2109 
2110   PetscFunctionBegin;
2111   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2112 
2113   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2114   ai = Aloc->i; aj = Aloc->j;
2115   bi = Bloc->i; bj = Bloc->j;
2116   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2117     PetscInt *d_nnz,*g_nnz,*o_nnz;
2118     PetscSFNode *oloc;
2119     PETSC_UNUSED PetscSF sf;
2120 
2121     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2122     /* compute d_nnz for preallocation */
2123     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2124     for (i=0; i<ai[ma]; i++) {
2125       d_nnz[aj[i]] ++;
2126       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2127     }
2128     /* compute local off-diagonal contributions */
2129     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2130     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2131     /* map those to global */
2132     ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr);
2133     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2134     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2135     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2136     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2137     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2138     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2139 
2140     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2141     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2142     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2143     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2144     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2145     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2146   } else {
2147     B = *matout;
2148     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2149     for (i=0; i<ai[ma]; i++){
2150       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2151     }
2152   }
2153 
2154   /* copy over the A part */
2155   array = Aloc->a;
2156   row = A->rmap->rstart;
2157   for (i=0; i<ma; i++) {
2158     ncol = ai[i+1]-ai[i];
2159     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2160     row++; array += ncol; aj += ncol;
2161   }
2162   aj = Aloc->j;
2163   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2164 
2165   /* copy over the B part */
2166   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2167   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2168   array = Bloc->a;
2169   row = A->rmap->rstart;
2170   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2171   cols_tmp = cols;
2172   for (i=0; i<mb; i++) {
2173     ncol = bi[i+1]-bi[i];
2174     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2175     row++; array += ncol; cols_tmp += ncol;
2176   }
2177   ierr = PetscFree(cols);CHKERRQ(ierr);
2178 
2179   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2180   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2181   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2182     *matout = B;
2183   } else {
2184     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2185   }
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2191 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2192 {
2193   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2194   Mat            a = aij->A,b = aij->B;
2195   PetscErrorCode ierr;
2196   PetscInt       s1,s2,s3;
2197 
2198   PetscFunctionBegin;
2199   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2200   if (rr) {
2201     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2202     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2203     /* Overlap communication with computation. */
2204     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2205   }
2206   if (ll) {
2207     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2208     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2209     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2210   }
2211   /* scale  the diagonal block */
2212   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2213 
2214   if (rr) {
2215     /* Do a scatter end and then right scale the off-diagonal block */
2216     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2217     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2218   }
2219 
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 #undef __FUNCT__
2224 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2225 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2226 {
2227   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2228   PetscErrorCode ierr;
2229 
2230   PetscFunctionBegin;
2231   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2232   PetscFunctionReturn(0);
2233 }
2234 
2235 #undef __FUNCT__
2236 #define __FUNCT__ "MatEqual_MPIAIJ"
2237 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2238 {
2239   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2240   Mat            a,b,c,d;
2241   PetscBool      flg;
2242   PetscErrorCode ierr;
2243 
2244   PetscFunctionBegin;
2245   a = matA->A; b = matA->B;
2246   c = matB->A; d = matB->B;
2247 
2248   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2249   if (flg) {
2250     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2251   }
2252   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatCopy_MPIAIJ"
2258 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2259 {
2260   PetscErrorCode ierr;
2261   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2262   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2263 
2264   PetscFunctionBegin;
2265   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2266   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2267     /* because of the column compression in the off-processor part of the matrix a->B,
2268        the number of columns in a->B and b->B may be different, hence we cannot call
2269        the MatCopy() directly on the two parts. If need be, we can provide a more
2270        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2271        then copying the submatrices */
2272     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2273   } else {
2274     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2275     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2276   }
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 #undef __FUNCT__
2281 #define __FUNCT__ "MatSetUp_MPIAIJ"
2282 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2283 {
2284   PetscErrorCode ierr;
2285 
2286   PetscFunctionBegin;
2287   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2293 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2294 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2295 {
2296   PetscInt          i,m=Y->rmap->N;
2297   Mat_SeqAIJ        *x = (Mat_SeqAIJ*)X->data;
2298   Mat_SeqAIJ        *y = (Mat_SeqAIJ*)Y->data;
2299   const PetscInt    *xi = x->i,*yi = y->i;
2300 
2301   PetscFunctionBegin;
2302   /* Set the number of nonzeros in the new matrix */
2303   for (i=0; i<m; i++) {
2304     PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2305     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2306     nnz[i] = 0;
2307     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2308       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2309       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2310       nnz[i]++;
2311     }
2312     for (; k<nzy; k++) nnz[i]++;
2313   }
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 #undef __FUNCT__
2318 #define __FUNCT__ "MatAXPY_MPIAIJ"
2319 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2320 {
2321   PetscErrorCode ierr;
2322   PetscInt       i;
2323   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2324   PetscBLASInt   bnz,one=1;
2325   Mat_SeqAIJ     *x,*y;
2326 
2327   PetscFunctionBegin;
2328   if (str == SAME_NONZERO_PATTERN) {
2329     PetscScalar alpha = a;
2330     x = (Mat_SeqAIJ *)xx->A->data;
2331     y = (Mat_SeqAIJ *)yy->A->data;
2332     bnz = PetscBLASIntCast(x->nz);
2333     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2334     x = (Mat_SeqAIJ *)xx->B->data;
2335     y = (Mat_SeqAIJ *)yy->B->data;
2336     bnz = PetscBLASIntCast(x->nz);
2337     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2338   } else if (str == SUBSET_NONZERO_PATTERN) {
2339     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2340 
2341     x = (Mat_SeqAIJ *)xx->B->data;
2342     y = (Mat_SeqAIJ *)yy->B->data;
2343     if (y->xtoy && y->XtoY != xx->B) {
2344       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2345       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2346     }
2347     if (!y->xtoy) { /* get xtoy */
2348       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2349       y->XtoY = xx->B;
2350       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2351     }
2352     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2353   } else {
2354     Mat B;
2355     PetscInt *nnz_d,*nnz_o;
2356     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2357     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2358     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2359     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2360     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2361     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2362     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2363     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2364     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2365     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2366     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2367     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2368     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2369     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2370   }
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatConjugate_MPIAIJ"
2378 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2379 {
2380 #if defined(PETSC_USE_COMPLEX)
2381   PetscErrorCode ierr;
2382   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2383 
2384   PetscFunctionBegin;
2385   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2386   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2387 #else
2388   PetscFunctionBegin;
2389 #endif
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatRealPart_MPIAIJ"
2395 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2396 {
2397   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2398   PetscErrorCode ierr;
2399 
2400   PetscFunctionBegin;
2401   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2402   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2408 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2409 {
2410   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2411   PetscErrorCode ierr;
2412 
2413   PetscFunctionBegin;
2414   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2415   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 #ifdef PETSC_HAVE_PBGL
2420 
2421 #include <boost/parallel/mpi/bsp_process_group.hpp>
2422 #include <boost/graph/distributed/ilu_default_graph.hpp>
2423 #include <boost/graph/distributed/ilu_0_block.hpp>
2424 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2425 #include <boost/graph/distributed/petsc/interface.hpp>
2426 #include <boost/multi_array.hpp>
2427 #include <boost/parallel/distributed_property_map->hpp>
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2431 /*
2432   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2433 */
2434 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2435 {
2436   namespace petsc = boost::distributed::petsc;
2437 
2438   namespace graph_dist = boost::graph::distributed;
2439   using boost::graph::distributed::ilu_default::process_group_type;
2440   using boost::graph::ilu_permuted;
2441 
2442   PetscBool       row_identity, col_identity;
2443   PetscContainer  c;
2444   PetscInt        m, n, M, N;
2445   PetscErrorCode  ierr;
2446 
2447   PetscFunctionBegin;
2448   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2449   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2450   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2451   if (!row_identity || !col_identity) {
2452     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2453   }
2454 
2455   process_group_type pg;
2456   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2457   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2458   lgraph_type&   level_graph = *lgraph_p;
2459   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2460 
2461   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2462   ilu_permuted(level_graph);
2463 
2464   /* put together the new matrix */
2465   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2466   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2467   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2468   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2469   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2470   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2471   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2472   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2473 
2474   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2475   ierr = PetscContainerSetPointer(c, lgraph_p);
2476   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2477   ierr = PetscContainerDestroy(&c);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2483 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2484 {
2485   PetscFunctionBegin;
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 #undef __FUNCT__
2490 #define __FUNCT__ "MatSolve_MPIAIJ"
2491 /*
2492   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2493 */
2494 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2495 {
2496   namespace graph_dist = boost::graph::distributed;
2497 
2498   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2499   lgraph_type*   lgraph_p;
2500   PetscContainer c;
2501   PetscErrorCode ierr;
2502 
2503   PetscFunctionBegin;
2504   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2505   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2506   ierr = VecCopy(b, x);CHKERRQ(ierr);
2507 
2508   PetscScalar* array_x;
2509   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2510   PetscInt sx;
2511   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2512 
2513   PetscScalar* array_b;
2514   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2515   PetscInt sb;
2516   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2517 
2518   lgraph_type&   level_graph = *lgraph_p;
2519   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2520 
2521   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2522   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2523                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2524 
2525   typedef boost::iterator_property_map<array_ref_type::iterator,
2526                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2527   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2528                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2529 
2530   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2531 
2532   PetscFunctionReturn(0);
2533 }
2534 #endif
2535 
2536 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2537   PetscInt       nzlocal,nsends,nrecvs;
2538   PetscMPIInt    *send_rank,*recv_rank;
2539   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2540   PetscScalar    *sbuf_a,**rbuf_a;
2541   PetscErrorCode (*Destroy)(Mat);
2542 } Mat_Redundant;
2543 
2544 #undef __FUNCT__
2545 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2546 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2547 {
2548   PetscErrorCode       ierr;
2549   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2550   PetscInt             i;
2551 
2552   PetscFunctionBegin;
2553   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2554   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2555   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2556   for (i=0; i<redund->nrecvs; i++){
2557     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2558     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2559   }
2560   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2561   ierr = PetscFree(redund);CHKERRQ(ierr);
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatDestroy_MatRedundant"
2567 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2568 {
2569   PetscErrorCode  ierr;
2570   PetscContainer  container;
2571   Mat_Redundant   *redund=PETSC_NULL;
2572 
2573   PetscFunctionBegin;
2574   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2575   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2576   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2577   A->ops->destroy = redund->Destroy;
2578   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2579   if (A->ops->destroy) {
2580     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2581   }
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 #undef __FUNCT__
2586 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2587 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2588 {
2589   PetscMPIInt    rank,size;
2590   MPI_Comm       comm=((PetscObject)mat)->comm;
2591   PetscErrorCode ierr;
2592   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2593   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2594   PetscInt       *rowrange=mat->rmap->range;
2595   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2596   Mat            A=aij->A,B=aij->B,C=*matredundant;
2597   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2598   PetscScalar    *sbuf_a;
2599   PetscInt       nzlocal=a->nz+b->nz;
2600   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2601   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2602   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2603   MatScalar      *aworkA,*aworkB;
2604   PetscScalar    *vals;
2605   PetscMPIInt    tag1,tag2,tag3,imdex;
2606   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2607                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2608   MPI_Status     recv_status,*send_status;
2609   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2610   PetscInt       **rbuf_j=PETSC_NULL;
2611   PetscScalar    **rbuf_a=PETSC_NULL;
2612   Mat_Redundant  *redund=PETSC_NULL;
2613   PetscContainer container;
2614 
2615   PetscFunctionBegin;
2616   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2617   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2618 
2619   if (reuse == MAT_REUSE_MATRIX) {
2620     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2621     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2622     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2623     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2624     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2625     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2626     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2627     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2628 
2629     nsends    = redund->nsends;
2630     nrecvs    = redund->nrecvs;
2631     send_rank = redund->send_rank;
2632     recv_rank = redund->recv_rank;
2633     sbuf_nz   = redund->sbuf_nz;
2634     rbuf_nz   = redund->rbuf_nz;
2635     sbuf_j    = redund->sbuf_j;
2636     sbuf_a    = redund->sbuf_a;
2637     rbuf_j    = redund->rbuf_j;
2638     rbuf_a    = redund->rbuf_a;
2639   }
2640 
2641   if (reuse == MAT_INITIAL_MATRIX){
2642     PetscMPIInt  subrank,subsize;
2643     PetscInt     nleftover,np_subcomm;
2644     /* get the destination processors' id send_rank, nsends and nrecvs */
2645     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2646     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2647     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2648     np_subcomm = size/nsubcomm;
2649     nleftover  = size - nsubcomm*np_subcomm;
2650     nsends = 0; nrecvs = 0;
2651     for (i=0; i<size; i++){ /* i=rank*/
2652       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2653         send_rank[nsends] = i; nsends++;
2654         recv_rank[nrecvs++] = i;
2655       }
2656     }
2657     if (rank >= size - nleftover){/* this proc is a leftover processor */
2658       i = size-nleftover-1;
2659       j = 0;
2660       while (j < nsubcomm - nleftover){
2661         send_rank[nsends++] = i;
2662         i--; j++;
2663       }
2664     }
2665 
2666     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2667       for (i=0; i<nleftover; i++){
2668         recv_rank[nrecvs++] = size-nleftover+i;
2669       }
2670     }
2671 
2672     /* allocate sbuf_j, sbuf_a */
2673     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2674     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2675     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2676   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2677 
2678   /* copy mat's local entries into the buffers */
2679   if (reuse == MAT_INITIAL_MATRIX){
2680     rownz_max = 0;
2681     rptr = sbuf_j;
2682     cols = sbuf_j + rend-rstart + 1;
2683     vals = sbuf_a;
2684     rptr[0] = 0;
2685     for (i=0; i<rend-rstart; i++){
2686       row = i + rstart;
2687       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2688       ncols  = nzA + nzB;
2689       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2690       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2691       /* load the column indices for this row into cols */
2692       lwrite = 0;
2693       for (l=0; l<nzB; l++) {
2694         if ((ctmp = bmap[cworkB[l]]) < cstart){
2695           vals[lwrite]   = aworkB[l];
2696           cols[lwrite++] = ctmp;
2697         }
2698       }
2699       for (l=0; l<nzA; l++){
2700         vals[lwrite]   = aworkA[l];
2701         cols[lwrite++] = cstart + cworkA[l];
2702       }
2703       for (l=0; l<nzB; l++) {
2704         if ((ctmp = bmap[cworkB[l]]) >= cend){
2705           vals[lwrite]   = aworkB[l];
2706           cols[lwrite++] = ctmp;
2707         }
2708       }
2709       vals += ncols;
2710       cols += ncols;
2711       rptr[i+1] = rptr[i] + ncols;
2712       if (rownz_max < ncols) rownz_max = ncols;
2713     }
2714     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2715   } else { /* only copy matrix values into sbuf_a */
2716     rptr = sbuf_j;
2717     vals = sbuf_a;
2718     rptr[0] = 0;
2719     for (i=0; i<rend-rstart; i++){
2720       row = i + rstart;
2721       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2722       ncols  = nzA + nzB;
2723       cworkB = b->j + b->i[i];
2724       aworkA = a->a + a->i[i];
2725       aworkB = b->a + b->i[i];
2726       lwrite = 0;
2727       for (l=0; l<nzB; l++) {
2728         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2729       }
2730       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2731       for (l=0; l<nzB; l++) {
2732         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2733       }
2734       vals += ncols;
2735       rptr[i+1] = rptr[i] + ncols;
2736     }
2737   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2738 
2739   /* send nzlocal to others, and recv other's nzlocal */
2740   /*--------------------------------------------------*/
2741   if (reuse == MAT_INITIAL_MATRIX){
2742     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2743     s_waits2 = s_waits3 + nsends;
2744     s_waits1 = s_waits2 + nsends;
2745     r_waits1 = s_waits1 + nsends;
2746     r_waits2 = r_waits1 + nrecvs;
2747     r_waits3 = r_waits2 + nrecvs;
2748   } else {
2749     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2750     r_waits3 = s_waits3 + nsends;
2751   }
2752 
2753   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2754   if (reuse == MAT_INITIAL_MATRIX){
2755     /* get new tags to keep the communication clean */
2756     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2757     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2758     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2759 
2760     /* post receives of other's nzlocal */
2761     for (i=0; i<nrecvs; i++){
2762       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2763     }
2764     /* send nzlocal to others */
2765     for (i=0; i<nsends; i++){
2766       sbuf_nz[i] = nzlocal;
2767       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2768     }
2769     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2770     count = nrecvs;
2771     while (count) {
2772       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2773       recv_rank[imdex] = recv_status.MPI_SOURCE;
2774       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2775       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2776 
2777       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2778       rbuf_nz[imdex] += i + 2;
2779       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2780       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2781       count--;
2782     }
2783     /* wait on sends of nzlocal */
2784     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2785     /* send mat->i,j to others, and recv from other's */
2786     /*------------------------------------------------*/
2787     for (i=0; i<nsends; i++){
2788       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2789       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2790     }
2791     /* wait on receives of mat->i,j */
2792     /*------------------------------*/
2793     count = nrecvs;
2794     while (count) {
2795       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2796       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2797       count--;
2798     }
2799     /* wait on sends of mat->i,j */
2800     /*---------------------------*/
2801     if (nsends) {
2802       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2803     }
2804   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2805 
2806   /* post receives, send and receive mat->a */
2807   /*----------------------------------------*/
2808   for (imdex=0; imdex<nrecvs; imdex++) {
2809     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2810   }
2811   for (i=0; i<nsends; i++){
2812     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2813   }
2814   count = nrecvs;
2815   while (count) {
2816     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2817     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2818     count--;
2819   }
2820   if (nsends) {
2821     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2822   }
2823 
2824   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2825 
2826   /* create redundant matrix */
2827   /*-------------------------*/
2828   if (reuse == MAT_INITIAL_MATRIX){
2829     /* compute rownz_max for preallocation */
2830     for (imdex=0; imdex<nrecvs; imdex++){
2831       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2832       rptr = rbuf_j[imdex];
2833       for (i=0; i<j; i++){
2834         ncols = rptr[i+1] - rptr[i];
2835         if (rownz_max < ncols) rownz_max = ncols;
2836       }
2837     }
2838 
2839     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2840     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2841     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2842     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2843     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2844     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2845   } else {
2846     C = *matredundant;
2847   }
2848 
2849   /* insert local matrix entries */
2850   rptr = sbuf_j;
2851   cols = sbuf_j + rend-rstart + 1;
2852   vals = sbuf_a;
2853   for (i=0; i<rend-rstart; i++){
2854     row   = i + rstart;
2855     ncols = rptr[i+1] - rptr[i];
2856     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2857     vals += ncols;
2858     cols += ncols;
2859   }
2860   /* insert received matrix entries */
2861   for (imdex=0; imdex<nrecvs; imdex++){
2862     rstart = rowrange[recv_rank[imdex]];
2863     rend   = rowrange[recv_rank[imdex]+1];
2864     rptr = rbuf_j[imdex];
2865     cols = rbuf_j[imdex] + rend-rstart + 1;
2866     vals = rbuf_a[imdex];
2867     for (i=0; i<rend-rstart; i++){
2868       row   = i + rstart;
2869       ncols = rptr[i+1] - rptr[i];
2870       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2871       vals += ncols;
2872       cols += ncols;
2873     }
2874   }
2875   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2876   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2877   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2878   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2879   if (reuse == MAT_INITIAL_MATRIX) {
2880     PetscContainer container;
2881     *matredundant = C;
2882     /* create a supporting struct and attach it to C for reuse */
2883     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2884     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2885     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2886     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2887     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2888     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2889 
2890     redund->nzlocal = nzlocal;
2891     redund->nsends  = nsends;
2892     redund->nrecvs  = nrecvs;
2893     redund->send_rank = send_rank;
2894     redund->recv_rank = recv_rank;
2895     redund->sbuf_nz = sbuf_nz;
2896     redund->rbuf_nz = rbuf_nz;
2897     redund->sbuf_j  = sbuf_j;
2898     redund->sbuf_a  = sbuf_a;
2899     redund->rbuf_j  = rbuf_j;
2900     redund->rbuf_a  = rbuf_a;
2901 
2902     redund->Destroy = C->ops->destroy;
2903     C->ops->destroy = MatDestroy_MatRedundant;
2904   }
2905   PetscFunctionReturn(0);
2906 }
2907 
2908 #undef __FUNCT__
2909 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2910 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2911 {
2912   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2913   PetscErrorCode ierr;
2914   PetscInt       i,*idxb = 0;
2915   PetscScalar    *va,*vb;
2916   Vec            vtmp;
2917 
2918   PetscFunctionBegin;
2919   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2920   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2921   if (idx) {
2922     for (i=0; i<A->rmap->n; i++) {
2923       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2924     }
2925   }
2926 
2927   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2928   if (idx) {
2929     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2930   }
2931   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2932   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2933 
2934   for (i=0; i<A->rmap->n; i++){
2935     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2936       va[i] = vb[i];
2937       if (idx) idx[i] = a->garray[idxb[i]];
2938     }
2939   }
2940 
2941   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2942   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2943   ierr = PetscFree(idxb);CHKERRQ(ierr);
2944   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2945   PetscFunctionReturn(0);
2946 }
2947 
2948 #undef __FUNCT__
2949 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2950 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2951 {
2952   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2953   PetscErrorCode ierr;
2954   PetscInt       i,*idxb = 0;
2955   PetscScalar    *va,*vb;
2956   Vec            vtmp;
2957 
2958   PetscFunctionBegin;
2959   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2960   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2961   if (idx) {
2962     for (i=0; i<A->cmap->n; i++) {
2963       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2964     }
2965   }
2966 
2967   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2968   if (idx) {
2969     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2970   }
2971   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2972   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2973 
2974   for (i=0; i<A->rmap->n; i++){
2975     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2976       va[i] = vb[i];
2977       if (idx) idx[i] = a->garray[idxb[i]];
2978     }
2979   }
2980 
2981   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2982   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2983   ierr = PetscFree(idxb);CHKERRQ(ierr);
2984   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2985   PetscFunctionReturn(0);
2986 }
2987 
2988 #undef __FUNCT__
2989 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2990 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2991 {
2992   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2993   PetscInt       n      = A->rmap->n;
2994   PetscInt       cstart = A->cmap->rstart;
2995   PetscInt      *cmap   = mat->garray;
2996   PetscInt      *diagIdx, *offdiagIdx;
2997   Vec            diagV, offdiagV;
2998   PetscScalar   *a, *diagA, *offdiagA;
2999   PetscInt       r;
3000   PetscErrorCode ierr;
3001 
3002   PetscFunctionBegin;
3003   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3004   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
3005   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
3006   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3007   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3008   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3009   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3010   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3011   for (r = 0; r < n; ++r) {
3012     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3013       a[r]   = diagA[r];
3014       idx[r] = cstart + diagIdx[r];
3015     } else {
3016       a[r]   = offdiagA[r];
3017       idx[r] = cmap[offdiagIdx[r]];
3018     }
3019   }
3020   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3021   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3022   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3023   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3024   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3025   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3026   PetscFunctionReturn(0);
3027 }
3028 
3029 #undef __FUNCT__
3030 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3031 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3032 {
3033   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
3034   PetscInt       n      = A->rmap->n;
3035   PetscInt       cstart = A->cmap->rstart;
3036   PetscInt      *cmap   = mat->garray;
3037   PetscInt      *diagIdx, *offdiagIdx;
3038   Vec            diagV, offdiagV;
3039   PetscScalar   *a, *diagA, *offdiagA;
3040   PetscInt       r;
3041   PetscErrorCode ierr;
3042 
3043   PetscFunctionBegin;
3044   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3045   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3046   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3047   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3048   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3049   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3050   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3051   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3052   for (r = 0; r < n; ++r) {
3053     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3054       a[r]   = diagA[r];
3055       idx[r] = cstart + diagIdx[r];
3056     } else {
3057       a[r]   = offdiagA[r];
3058       idx[r] = cmap[offdiagIdx[r]];
3059     }
3060   }
3061   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3062   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3063   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3064   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3065   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3066   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3067   PetscFunctionReturn(0);
3068 }
3069 
3070 #undef __FUNCT__
3071 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3072 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3073 {
3074   PetscErrorCode ierr;
3075   Mat            *dummy;
3076 
3077   PetscFunctionBegin;
3078   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3079   *newmat = *dummy;
3080   ierr = PetscFree(dummy);CHKERRQ(ierr);
3081   PetscFunctionReturn(0);
3082 }
3083 
3084 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3085 
3086 #undef __FUNCT__
3087 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3088 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3089 {
3090   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
3091   PetscErrorCode ierr;
3092 
3093   PetscFunctionBegin;
3094   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 #undef __FUNCT__
3099 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3100 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3101 {
3102   PetscErrorCode ierr;
3103   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3104 
3105   PetscFunctionBegin;
3106   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3107   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3108   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3109   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3110   PetscFunctionReturn(0);
3111 }
3112 
3113 /* -------------------------------------------------------------------*/
3114 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3115        MatGetRow_MPIAIJ,
3116        MatRestoreRow_MPIAIJ,
3117        MatMult_MPIAIJ,
3118 /* 4*/ MatMultAdd_MPIAIJ,
3119        MatMultTranspose_MPIAIJ,
3120        MatMultTransposeAdd_MPIAIJ,
3121 #ifdef PETSC_HAVE_PBGL
3122        MatSolve_MPIAIJ,
3123 #else
3124        0,
3125 #endif
3126        0,
3127        0,
3128 /*10*/ 0,
3129        0,
3130        0,
3131        MatSOR_MPIAIJ,
3132        MatTranspose_MPIAIJ,
3133 /*15*/ MatGetInfo_MPIAIJ,
3134        MatEqual_MPIAIJ,
3135        MatGetDiagonal_MPIAIJ,
3136        MatDiagonalScale_MPIAIJ,
3137        MatNorm_MPIAIJ,
3138 /*20*/ MatAssemblyBegin_MPIAIJ,
3139        MatAssemblyEnd_MPIAIJ,
3140        MatSetOption_MPIAIJ,
3141        MatZeroEntries_MPIAIJ,
3142 /*24*/ MatZeroRows_MPIAIJ,
3143        0,
3144 #ifdef PETSC_HAVE_PBGL
3145        0,
3146 #else
3147        0,
3148 #endif
3149        0,
3150        0,
3151 /*29*/ MatSetUp_MPIAIJ,
3152 #ifdef PETSC_HAVE_PBGL
3153        0,
3154 #else
3155        0,
3156 #endif
3157        0,
3158        0,
3159        0,
3160 /*34*/ MatDuplicate_MPIAIJ,
3161        0,
3162        0,
3163        0,
3164        0,
3165 /*39*/ MatAXPY_MPIAIJ,
3166        MatGetSubMatrices_MPIAIJ,
3167        MatIncreaseOverlap_MPIAIJ,
3168        MatGetValues_MPIAIJ,
3169        MatCopy_MPIAIJ,
3170 /*44*/ MatGetRowMax_MPIAIJ,
3171        MatScale_MPIAIJ,
3172        0,
3173        0,
3174        MatZeroRowsColumns_MPIAIJ,
3175 /*49*/ MatSetRandom_MPIAIJ,
3176        0,
3177        0,
3178        0,
3179        0,
3180 /*54*/ MatFDColoringCreate_MPIAIJ,
3181        0,
3182        MatSetUnfactored_MPIAIJ,
3183        MatPermute_MPIAIJ,
3184        0,
3185 /*59*/ MatGetSubMatrix_MPIAIJ,
3186        MatDestroy_MPIAIJ,
3187        MatView_MPIAIJ,
3188        0,
3189        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3190 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3191        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3192        0,
3193        0,
3194        0,
3195 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3196        MatGetRowMinAbs_MPIAIJ,
3197        0,
3198        MatSetColoring_MPIAIJ,
3199        0,
3200        MatSetValuesAdifor_MPIAIJ,
3201 /*75*/ MatFDColoringApply_AIJ,
3202        0,
3203        0,
3204        0,
3205        MatFindZeroDiagonals_MPIAIJ,
3206 /*80*/ 0,
3207        0,
3208        0,
3209 /*83*/ MatLoad_MPIAIJ,
3210        0,
3211        0,
3212        0,
3213        0,
3214        0,
3215 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3216        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3217        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3218        MatPtAP_MPIAIJ_MPIAIJ,
3219        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3220 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3221        0,
3222        0,
3223        0,
3224        0,
3225 /*99*/ 0,
3226        0,
3227        0,
3228        MatConjugate_MPIAIJ,
3229        0,
3230 /*104*/MatSetValuesRow_MPIAIJ,
3231        MatRealPart_MPIAIJ,
3232        MatImaginaryPart_MPIAIJ,
3233        0,
3234        0,
3235 /*109*/0,
3236        MatGetRedundantMatrix_MPIAIJ,
3237        MatGetRowMin_MPIAIJ,
3238        0,
3239        0,
3240 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3241        0,
3242        0,
3243        0,
3244        0,
3245 /*119*/0,
3246        0,
3247        0,
3248        0,
3249        MatGetMultiProcBlock_MPIAIJ,
3250 /*124*/MatFindNonzeroRows_MPIAIJ,
3251        MatGetColumnNorms_MPIAIJ,
3252        MatInvertBlockDiagonal_MPIAIJ,
3253        0,
3254        MatGetSubMatricesParallel_MPIAIJ,
3255 /*129*/0,
3256        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3257        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3258        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3259        0,
3260 /*134*/0,
3261        0,
3262        0,
3263        0,
3264        0
3265 };
3266 
3267 /* ----------------------------------------------------------------------------------------*/
3268 
3269 EXTERN_C_BEGIN
3270 #undef __FUNCT__
3271 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3272 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3273 {
3274   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3275   PetscErrorCode ierr;
3276 
3277   PetscFunctionBegin;
3278   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3279   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3280   PetscFunctionReturn(0);
3281 }
3282 EXTERN_C_END
3283 
3284 EXTERN_C_BEGIN
3285 #undef __FUNCT__
3286 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3287 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3288 {
3289   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3290   PetscErrorCode ierr;
3291 
3292   PetscFunctionBegin;
3293   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3294   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3295   PetscFunctionReturn(0);
3296 }
3297 EXTERN_C_END
3298 
3299 EXTERN_C_BEGIN
3300 #undef __FUNCT__
3301 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3302 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3303 {
3304   Mat_MPIAIJ     *b;
3305   PetscErrorCode ierr;
3306   PetscInt       i;
3307   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3308 
3309   PetscFunctionBegin;
3310   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3311   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3312   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3313   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3314   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3315   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3316 
3317   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3318   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3319   if (d_nnz) {
3320     for (i=0; i<B->rmap->n; i++) {
3321       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3322     }
3323   }
3324   if (o_nnz) {
3325     for (i=0; i<B->rmap->n; i++) {
3326       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3327     }
3328   }
3329   b = (Mat_MPIAIJ*)B->data;
3330 
3331   if (!B->preallocated) {
3332     /* Explicitly create 2 MATSEQAIJ matrices. */
3333     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3334     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3335     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3336     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3337     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3338     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3339     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3340     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3341     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3342     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3343   }
3344 
3345   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3346   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3347   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3348   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3349   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3350   B->preallocated = PETSC_TRUE;
3351   PetscFunctionReturn(0);
3352 }
3353 EXTERN_C_END
3354 
3355 #undef __FUNCT__
3356 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3357 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3358 {
3359   Mat            mat;
3360   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3361   PetscErrorCode ierr;
3362 
3363   PetscFunctionBegin;
3364   *newmat       = 0;
3365   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3366   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3367   ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3368   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3369   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3370   a    = (Mat_MPIAIJ*)mat->data;
3371 
3372   mat->factortype    = matin->factortype;
3373   mat->rmap->bs      = matin->rmap->bs;
3374   mat->cmap->bs      = matin->cmap->bs;
3375   mat->assembled    = PETSC_TRUE;
3376   mat->insertmode   = NOT_SET_VALUES;
3377   mat->preallocated = PETSC_TRUE;
3378 
3379   a->size           = oldmat->size;
3380   a->rank           = oldmat->rank;
3381   a->donotstash     = oldmat->donotstash;
3382   a->roworiented    = oldmat->roworiented;
3383   a->rowindices     = 0;
3384   a->rowvalues      = 0;
3385   a->getrowactive   = PETSC_FALSE;
3386 
3387   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3388   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3389 
3390   if (oldmat->colmap) {
3391 #if defined (PETSC_USE_CTABLE)
3392     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3393 #else
3394     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3395     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3396     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3397 #endif
3398   } else a->colmap = 0;
3399   if (oldmat->garray) {
3400     PetscInt len;
3401     len  = oldmat->B->cmap->n;
3402     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3403     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3404     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3405   } else a->garray = 0;
3406 
3407   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3408   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3409   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3410   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3411   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3412   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3413   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3414   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3415   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3416   *newmat = mat;
3417   PetscFunctionReturn(0);
3418 }
3419 
3420 
3421 
3422 #undef __FUNCT__
3423 #define __FUNCT__ "MatLoad_MPIAIJ"
3424 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3425 {
3426   PetscScalar    *vals,*svals;
3427   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3428   PetscErrorCode ierr;
3429   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3430   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3431   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3432   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3433   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3434   int            fd;
3435   PetscInt       bs = 1;
3436 
3437   PetscFunctionBegin;
3438   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3439   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3440   if (!rank) {
3441     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3442     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3443     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3444   }
3445 
3446   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3447   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3448   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3449 
3450   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3451 
3452   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3453   M = header[1]; N = header[2];
3454   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3455   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3456   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3457 
3458   /* If global sizes are set, check if they are consistent with that given in the file */
3459   if (sizesset) {
3460     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3461   }
3462   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3463   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3464 
3465   /* determine ownership of all (block) rows */
3466   if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3467   if (newMat->rmap->n < 0 ) m    = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3468   else m = newMat->rmap->n; /* Set by user */
3469 
3470   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3471   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3472 
3473   /* First process needs enough room for process with most rows */
3474   if (!rank) {
3475     mmax = rowners[1];
3476     for (i=2; i<=size; i++) {
3477       mmax = PetscMax(mmax, rowners[i]);
3478     }
3479   } else mmax = m;
3480 
3481   rowners[0] = 0;
3482   for (i=2; i<=size; i++) {
3483     rowners[i] += rowners[i-1];
3484   }
3485   rstart = rowners[rank];
3486   rend   = rowners[rank+1];
3487 
3488   /* distribute row lengths to all processors */
3489   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3490   if (!rank) {
3491     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3492     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3493     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3494     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3495     for (j=0; j<m; j++) {
3496       procsnz[0] += ourlens[j];
3497     }
3498     for (i=1; i<size; i++) {
3499       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3500       /* calculate the number of nonzeros on each processor */
3501       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3502         procsnz[i] += rowlengths[j];
3503       }
3504       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3505     }
3506     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3507   } else {
3508     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3509   }
3510 
3511   if (!rank) {
3512     /* determine max buffer needed and allocate it */
3513     maxnz = 0;
3514     for (i=0; i<size; i++) {
3515       maxnz = PetscMax(maxnz,procsnz[i]);
3516     }
3517     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3518 
3519     /* read in my part of the matrix column indices  */
3520     nz   = procsnz[0];
3521     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3522     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3523 
3524     /* read in every one elses and ship off */
3525     for (i=1; i<size; i++) {
3526       nz     = procsnz[i];
3527       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3528       ierr   = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3529     }
3530     ierr = PetscFree(cols);CHKERRQ(ierr);
3531   } else {
3532     /* determine buffer space needed for message */
3533     nz = 0;
3534     for (i=0; i<m; i++) {
3535       nz += ourlens[i];
3536     }
3537     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3538 
3539     /* receive message of column indices*/
3540     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3541   }
3542 
3543   /* determine column ownership if matrix is not square */
3544   if (N != M) {
3545     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3546     else n = newMat->cmap->n;
3547     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3548     cstart = cend - n;
3549   } else {
3550     cstart = rstart;
3551     cend   = rend;
3552     n      = cend - cstart;
3553   }
3554 
3555   /* loop over local rows, determining number of off diagonal entries */
3556   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3557   jj = 0;
3558   for (i=0; i<m; i++) {
3559     for (j=0; j<ourlens[i]; j++) {
3560       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3561       jj++;
3562     }
3563   }
3564 
3565   for (i=0; i<m; i++) {
3566     ourlens[i] -= offlens[i];
3567   }
3568   if (!sizesset) {
3569     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3570   }
3571 
3572   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3573 
3574   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3575 
3576   for (i=0; i<m; i++) {
3577     ourlens[i] += offlens[i];
3578   }
3579 
3580   if (!rank) {
3581     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3582 
3583     /* read in my part of the matrix numerical values  */
3584     nz   = procsnz[0];
3585     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3586 
3587     /* insert into matrix */
3588     jj      = rstart;
3589     smycols = mycols;
3590     svals   = vals;
3591     for (i=0; i<m; i++) {
3592       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3593       smycols += ourlens[i];
3594       svals   += ourlens[i];
3595       jj++;
3596     }
3597 
3598     /* read in other processors and ship out */
3599     for (i=1; i<size; i++) {
3600       nz     = procsnz[i];
3601       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3602       ierr   = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3603     }
3604     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3605   } else {
3606     /* receive numeric values */
3607     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3608 
3609     /* receive message of values*/
3610     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3611 
3612     /* insert into matrix */
3613     jj      = rstart;
3614     smycols = mycols;
3615     svals   = vals;
3616     for (i=0; i<m; i++) {
3617       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3618       smycols += ourlens[i];
3619       svals   += ourlens[i];
3620       jj++;
3621     }
3622   }
3623   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3624   ierr = PetscFree(vals);CHKERRQ(ierr);
3625   ierr = PetscFree(mycols);CHKERRQ(ierr);
3626   ierr = PetscFree(rowners);CHKERRQ(ierr);
3627   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3628   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3629 
3630   PetscFunctionReturn(0);
3631 }
3632 
3633 #undef __FUNCT__
3634 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3635 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3636 {
3637   PetscErrorCode ierr;
3638   IS             iscol_local;
3639   PetscInt       csize;
3640 
3641   PetscFunctionBegin;
3642   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3643   if (call == MAT_REUSE_MATRIX) {
3644     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3645     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3646   } else {
3647     PetscInt cbs;
3648     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3649     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3650     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3651   }
3652   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3653   if (call == MAT_INITIAL_MATRIX) {
3654     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3655     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3656   }
3657   PetscFunctionReturn(0);
3658 }
3659 
3660 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3661 #undef __FUNCT__
3662 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3663 /*
3664     Not great since it makes two copies of the submatrix, first an SeqAIJ
3665   in local and then by concatenating the local matrices the end result.
3666   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3667 
3668   Note: This requires a sequential iscol with all indices.
3669 */
3670 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3671 {
3672   PetscErrorCode ierr;
3673   PetscMPIInt    rank,size;
3674   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3675   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3676   PetscBool      allcolumns, colflag;
3677   Mat            M,Mreuse;
3678   MatScalar      *vwork,*aa;
3679   MPI_Comm       comm = ((PetscObject)mat)->comm;
3680   Mat_SeqAIJ     *aij;
3681 
3682 
3683   PetscFunctionBegin;
3684   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3685   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3686 
3687   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3688   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3689   if (colflag && ncol == mat->cmap->N){
3690     allcolumns = PETSC_TRUE;
3691   } else {
3692     allcolumns = PETSC_FALSE;
3693   }
3694   if (call ==  MAT_REUSE_MATRIX) {
3695     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3696     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3697     ierr  = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3698   } else {
3699     ierr   = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3700   }
3701 
3702   /*
3703       m - number of local rows
3704       n - number of columns (same on all processors)
3705       rstart - first row in new global matrix generated
3706   */
3707   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3708   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3709   if (call == MAT_INITIAL_MATRIX) {
3710     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3711     ii  = aij->i;
3712     jj  = aij->j;
3713 
3714     /*
3715         Determine the number of non-zeros in the diagonal and off-diagonal
3716         portions of the matrix in order to do correct preallocation
3717     */
3718 
3719     /* first get start and end of "diagonal" columns */
3720     if (csize == PETSC_DECIDE) {
3721       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3722       if (mglobal == n) { /* square matrix */
3723 	nlocal = m;
3724       } else {
3725         nlocal = n/size + ((n % size) > rank);
3726       }
3727     } else {
3728       nlocal = csize;
3729     }
3730     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3731     rstart = rend - nlocal;
3732     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3733 
3734     /* next, compute all the lengths */
3735     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3736     olens = dlens + m;
3737     for (i=0; i<m; i++) {
3738       jend = ii[i+1] - ii[i];
3739       olen = 0;
3740       dlen = 0;
3741       for (j=0; j<jend; j++) {
3742         if (*jj < rstart || *jj >= rend) olen++;
3743         else dlen++;
3744         jj++;
3745       }
3746       olens[i] = olen;
3747       dlens[i] = dlen;
3748     }
3749     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3750     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3751     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3752     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3753     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3754     ierr = PetscFree(dlens);CHKERRQ(ierr);
3755   } else {
3756     PetscInt ml,nl;
3757 
3758     M = *newmat;
3759     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3760     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3761     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3762     /*
3763          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3764        rather than the slower MatSetValues().
3765     */
3766     M->was_assembled = PETSC_TRUE;
3767     M->assembled     = PETSC_FALSE;
3768   }
3769   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3770   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3771   ii  = aij->i;
3772   jj  = aij->j;
3773   aa  = aij->a;
3774   for (i=0; i<m; i++) {
3775     row   = rstart + i;
3776     nz    = ii[i+1] - ii[i];
3777     cwork = jj;     jj += nz;
3778     vwork = aa;     aa += nz;
3779     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3780   }
3781 
3782   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3783   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3784   *newmat = M;
3785 
3786   /* save submatrix used in processor for next request */
3787   if (call ==  MAT_INITIAL_MATRIX) {
3788     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3789     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3790   }
3791 
3792   PetscFunctionReturn(0);
3793 }
3794 
3795 EXTERN_C_BEGIN
3796 #undef __FUNCT__
3797 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3798 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3799 {
3800   PetscInt       m,cstart, cend,j,nnz,i,d;
3801   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3802   const PetscInt *JJ;
3803   PetscScalar    *values;
3804   PetscErrorCode ierr;
3805 
3806   PetscFunctionBegin;
3807   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3808 
3809   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3810   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3811   m      = B->rmap->n;
3812   cstart = B->cmap->rstart;
3813   cend   = B->cmap->rend;
3814   rstart = B->rmap->rstart;
3815 
3816   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3817 
3818 #if defined(PETSC_USE_DEBUGGING)
3819   for (i=0; i<m; i++) {
3820     nnz     = Ii[i+1]- Ii[i];
3821     JJ      = J + Ii[i];
3822     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3823     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3824     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3825   }
3826 #endif
3827 
3828   for (i=0; i<m; i++) {
3829     nnz     = Ii[i+1]- Ii[i];
3830     JJ      = J + Ii[i];
3831     nnz_max = PetscMax(nnz_max,nnz);
3832     d       = 0;
3833     for (j=0; j<nnz; j++) {
3834       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3835     }
3836     d_nnz[i] = d;
3837     o_nnz[i] = nnz - d;
3838   }
3839   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3840   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3841 
3842   if (v) values = (PetscScalar*)v;
3843   else {
3844     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3845     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3846   }
3847 
3848   for (i=0; i<m; i++) {
3849     ii   = i + rstart;
3850     nnz  = Ii[i+1]- Ii[i];
3851     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3852   }
3853   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3854   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3855 
3856   if (!v) {
3857     ierr = PetscFree(values);CHKERRQ(ierr);
3858   }
3859   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3860   PetscFunctionReturn(0);
3861 }
3862 EXTERN_C_END
3863 
3864 #undef __FUNCT__
3865 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3866 /*@
3867    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3868    (the default parallel PETSc format).
3869 
3870    Collective on MPI_Comm
3871 
3872    Input Parameters:
3873 +  B - the matrix
3874 .  i - the indices into j for the start of each local row (starts with zero)
3875 .  j - the column indices for each local row (starts with zero)
3876 -  v - optional values in the matrix
3877 
3878    Level: developer
3879 
3880    Notes:
3881        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3882      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3883      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3884 
3885        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3886 
3887        The format which is used for the sparse matrix input, is equivalent to a
3888     row-major ordering.. i.e for the following matrix, the input data expected is
3889     as shown:
3890 
3891         1 0 0
3892         2 0 3     P0
3893        -------
3894         4 5 6     P1
3895 
3896      Process0 [P0]: rows_owned=[0,1]
3897         i =  {0,1,3}  [size = nrow+1  = 2+1]
3898         j =  {0,0,2}  [size = nz = 6]
3899         v =  {1,2,3}  [size = nz = 6]
3900 
3901      Process1 [P1]: rows_owned=[2]
3902         i =  {0,3}    [size = nrow+1  = 1+1]
3903         j =  {0,1,2}  [size = nz = 6]
3904         v =  {4,5,6}  [size = nz = 6]
3905 
3906 .keywords: matrix, aij, compressed row, sparse, parallel
3907 
3908 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3909           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3910 @*/
3911 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3912 {
3913   PetscErrorCode ierr;
3914 
3915   PetscFunctionBegin;
3916   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3917   PetscFunctionReturn(0);
3918 }
3919 
3920 #undef __FUNCT__
3921 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3922 /*@C
3923    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3924    (the default parallel PETSc format).  For good matrix assembly performance
3925    the user should preallocate the matrix storage by setting the parameters
3926    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3927    performance can be increased by more than a factor of 50.
3928 
3929    Collective on MPI_Comm
3930 
3931    Input Parameters:
3932 +  A - the matrix
3933 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3934            (same value is used for all local rows)
3935 .  d_nnz - array containing the number of nonzeros in the various rows of the
3936            DIAGONAL portion of the local submatrix (possibly different for each row)
3937            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3938            The size of this array is equal to the number of local rows, i.e 'm'.
3939            For matrices that will be factored, you must leave room for (and set)
3940            the diagonal entry even if it is zero.
3941 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3942            submatrix (same value is used for all local rows).
3943 -  o_nnz - array containing the number of nonzeros in the various rows of the
3944            OFF-DIAGONAL portion of the local submatrix (possibly different for
3945            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3946            structure. The size of this array is equal to the number
3947            of local rows, i.e 'm'.
3948 
3949    If the *_nnz parameter is given then the *_nz parameter is ignored
3950 
3951    The AIJ format (also called the Yale sparse matrix format or
3952    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3953    storage.  The stored row and column indices begin with zero.
3954    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3955 
3956    The parallel matrix is partitioned such that the first m0 rows belong to
3957    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3958    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3959 
3960    The DIAGONAL portion of the local submatrix of a processor can be defined
3961    as the submatrix which is obtained by extraction the part corresponding to
3962    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3963    first row that belongs to the processor, r2 is the last row belonging to
3964    the this processor, and c1-c2 is range of indices of the local part of a
3965    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3966    common case of a square matrix, the row and column ranges are the same and
3967    the DIAGONAL part is also square. The remaining portion of the local
3968    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3969 
3970    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3971 
3972    You can call MatGetInfo() to get information on how effective the preallocation was;
3973    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3974    You can also run with the option -info and look for messages with the string
3975    malloc in them to see if additional memory allocation was needed.
3976 
3977    Example usage:
3978 
3979    Consider the following 8x8 matrix with 34 non-zero values, that is
3980    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3981    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3982    as follows:
3983 
3984 .vb
3985             1  2  0  |  0  3  0  |  0  4
3986     Proc0   0  5  6  |  7  0  0  |  8  0
3987             9  0 10  | 11  0  0  | 12  0
3988     -------------------------------------
3989            13  0 14  | 15 16 17  |  0  0
3990     Proc1   0 18  0  | 19 20 21  |  0  0
3991             0  0  0  | 22 23  0  | 24  0
3992     -------------------------------------
3993     Proc2  25 26 27  |  0  0 28  | 29  0
3994            30  0  0  | 31 32 33  |  0 34
3995 .ve
3996 
3997    This can be represented as a collection of submatrices as:
3998 
3999 .vb
4000       A B C
4001       D E F
4002       G H I
4003 .ve
4004 
4005    Where the submatrices A,B,C are owned by proc0, D,E,F are
4006    owned by proc1, G,H,I are owned by proc2.
4007 
4008    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4009    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4010    The 'M','N' parameters are 8,8, and have the same values on all procs.
4011 
4012    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4013    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4014    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4015    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4016    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4017    matrix, ans [DF] as another SeqAIJ matrix.
4018 
4019    When d_nz, o_nz parameters are specified, d_nz storage elements are
4020    allocated for every row of the local diagonal submatrix, and o_nz
4021    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4022    One way to choose d_nz and o_nz is to use the max nonzerors per local
4023    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4024    In this case, the values of d_nz,o_nz are:
4025 .vb
4026      proc0 : dnz = 2, o_nz = 2
4027      proc1 : dnz = 3, o_nz = 2
4028      proc2 : dnz = 1, o_nz = 4
4029 .ve
4030    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4031    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4032    for proc3. i.e we are using 12+15+10=37 storage locations to store
4033    34 values.
4034 
4035    When d_nnz, o_nnz parameters are specified, the storage is specified
4036    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4037    In the above case the values for d_nnz,o_nnz are:
4038 .vb
4039      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4040      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4041      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4042 .ve
4043    Here the space allocated is sum of all the above values i.e 34, and
4044    hence pre-allocation is perfect.
4045 
4046    Level: intermediate
4047 
4048 .keywords: matrix, aij, compressed row, sparse, parallel
4049 
4050 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4051           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4052 @*/
4053 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4054 {
4055   PetscErrorCode ierr;
4056 
4057   PetscFunctionBegin;
4058   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4059   PetscValidType(B,1);
4060   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4061   PetscFunctionReturn(0);
4062 }
4063 
4064 #undef __FUNCT__
4065 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4066 /*@
4067      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4068          CSR format the local rows.
4069 
4070    Collective on MPI_Comm
4071 
4072    Input Parameters:
4073 +  comm - MPI communicator
4074 .  m - number of local rows (Cannot be PETSC_DECIDE)
4075 .  n - This value should be the same as the local size used in creating the
4076        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4077        calculated if N is given) For square matrices n is almost always m.
4078 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4079 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4080 .   i - row indices
4081 .   j - column indices
4082 -   a - matrix values
4083 
4084    Output Parameter:
4085 .   mat - the matrix
4086 
4087    Level: intermediate
4088 
4089    Notes:
4090        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4091      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4092      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4093 
4094        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4095 
4096        The format which is used for the sparse matrix input, is equivalent to a
4097     row-major ordering.. i.e for the following matrix, the input data expected is
4098     as shown:
4099 
4100         1 0 0
4101         2 0 3     P0
4102        -------
4103         4 5 6     P1
4104 
4105      Process0 [P0]: rows_owned=[0,1]
4106         i =  {0,1,3}  [size = nrow+1  = 2+1]
4107         j =  {0,0,2}  [size = nz = 6]
4108         v =  {1,2,3}  [size = nz = 6]
4109 
4110      Process1 [P1]: rows_owned=[2]
4111         i =  {0,3}    [size = nrow+1  = 1+1]
4112         j =  {0,1,2}  [size = nz = 6]
4113         v =  {4,5,6}  [size = nz = 6]
4114 
4115 .keywords: matrix, aij, compressed row, sparse, parallel
4116 
4117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4118           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4119 @*/
4120 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4121 {
4122   PetscErrorCode ierr;
4123 
4124   PetscFunctionBegin;
4125   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4126   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4127   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4128   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4129   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4130   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4131   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4132   PetscFunctionReturn(0);
4133 }
4134 
4135 #undef __FUNCT__
4136 #define __FUNCT__ "MatCreateAIJ"
4137 /*@C
4138    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4139    (the default parallel PETSc format).  For good matrix assembly performance
4140    the user should preallocate the matrix storage by setting the parameters
4141    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4142    performance can be increased by more than a factor of 50.
4143 
4144    Collective on MPI_Comm
4145 
4146    Input Parameters:
4147 +  comm - MPI communicator
4148 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4149            This value should be the same as the local size used in creating the
4150            y vector for the matrix-vector product y = Ax.
4151 .  n - This value should be the same as the local size used in creating the
4152        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4153        calculated if N is given) For square matrices n is almost always m.
4154 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4155 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4156 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4157            (same value is used for all local rows)
4158 .  d_nnz - array containing the number of nonzeros in the various rows of the
4159            DIAGONAL portion of the local submatrix (possibly different for each row)
4160            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4161            The size of this array is equal to the number of local rows, i.e 'm'.
4162 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4163            submatrix (same value is used for all local rows).
4164 -  o_nnz - array containing the number of nonzeros in the various rows of the
4165            OFF-DIAGONAL portion of the local submatrix (possibly different for
4166            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4167            structure. The size of this array is equal to the number
4168            of local rows, i.e 'm'.
4169 
4170    Output Parameter:
4171 .  A - the matrix
4172 
4173    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4174    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4175    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4176 
4177    Notes:
4178    If the *_nnz parameter is given then the *_nz parameter is ignored
4179 
4180    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4181    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4182    storage requirements for this matrix.
4183 
4184    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4185    processor than it must be used on all processors that share the object for
4186    that argument.
4187 
4188    The user MUST specify either the local or global matrix dimensions
4189    (possibly both).
4190 
4191    The parallel matrix is partitioned across processors such that the
4192    first m0 rows belong to process 0, the next m1 rows belong to
4193    process 1, the next m2 rows belong to process 2 etc.. where
4194    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4195    values corresponding to [m x N] submatrix.
4196 
4197    The columns are logically partitioned with the n0 columns belonging
4198    to 0th partition, the next n1 columns belonging to the next
4199    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4200 
4201    The DIAGONAL portion of the local submatrix on any given processor
4202    is the submatrix corresponding to the rows and columns m,n
4203    corresponding to the given processor. i.e diagonal matrix on
4204    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4205    etc. The remaining portion of the local submatrix [m x (N-n)]
4206    constitute the OFF-DIAGONAL portion. The example below better
4207    illustrates this concept.
4208 
4209    For a square global matrix we define each processor's diagonal portion
4210    to be its local rows and the corresponding columns (a square submatrix);
4211    each processor's off-diagonal portion encompasses the remainder of the
4212    local matrix (a rectangular submatrix).
4213 
4214    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4215 
4216    When calling this routine with a single process communicator, a matrix of
4217    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4218    type of communicator, use the construction mechanism:
4219      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4220 
4221    By default, this format uses inodes (identical nodes) when possible.
4222    We search for consecutive rows with the same nonzero structure, thereby
4223    reusing matrix information to achieve increased efficiency.
4224 
4225    Options Database Keys:
4226 +  -mat_no_inode  - Do not use inodes
4227 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4228 -  -mat_aij_oneindex - Internally use indexing starting at 1
4229         rather than 0.  Note that when calling MatSetValues(),
4230         the user still MUST index entries starting at 0!
4231 
4232 
4233    Example usage:
4234 
4235    Consider the following 8x8 matrix with 34 non-zero values, that is
4236    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4237    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4238    as follows:
4239 
4240 .vb
4241             1  2  0  |  0  3  0  |  0  4
4242     Proc0   0  5  6  |  7  0  0  |  8  0
4243             9  0 10  | 11  0  0  | 12  0
4244     -------------------------------------
4245            13  0 14  | 15 16 17  |  0  0
4246     Proc1   0 18  0  | 19 20 21  |  0  0
4247             0  0  0  | 22 23  0  | 24  0
4248     -------------------------------------
4249     Proc2  25 26 27  |  0  0 28  | 29  0
4250            30  0  0  | 31 32 33  |  0 34
4251 .ve
4252 
4253    This can be represented as a collection of submatrices as:
4254 
4255 .vb
4256       A B C
4257       D E F
4258       G H I
4259 .ve
4260 
4261    Where the submatrices A,B,C are owned by proc0, D,E,F are
4262    owned by proc1, G,H,I are owned by proc2.
4263 
4264    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4265    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4266    The 'M','N' parameters are 8,8, and have the same values on all procs.
4267 
4268    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4269    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4270    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4271    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4272    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4273    matrix, ans [DF] as another SeqAIJ matrix.
4274 
4275    When d_nz, o_nz parameters are specified, d_nz storage elements are
4276    allocated for every row of the local diagonal submatrix, and o_nz
4277    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4278    One way to choose d_nz and o_nz is to use the max nonzerors per local
4279    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4280    In this case, the values of d_nz,o_nz are:
4281 .vb
4282      proc0 : dnz = 2, o_nz = 2
4283      proc1 : dnz = 3, o_nz = 2
4284      proc2 : dnz = 1, o_nz = 4
4285 .ve
4286    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4287    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4288    for proc3. i.e we are using 12+15+10=37 storage locations to store
4289    34 values.
4290 
4291    When d_nnz, o_nnz parameters are specified, the storage is specified
4292    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4293    In the above case the values for d_nnz,o_nnz are:
4294 .vb
4295      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4296      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4297      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4298 .ve
4299    Here the space allocated is sum of all the above values i.e 34, and
4300    hence pre-allocation is perfect.
4301 
4302    Level: intermediate
4303 
4304 .keywords: matrix, aij, compressed row, sparse, parallel
4305 
4306 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4307           MPIAIJ, MatCreateMPIAIJWithArrays()
4308 @*/
4309 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4310 {
4311   PetscErrorCode ierr;
4312   PetscMPIInt    size;
4313 
4314   PetscFunctionBegin;
4315   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4316   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4317   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4318   if (size > 1) {
4319     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4320     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4321   } else {
4322     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4323     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4324   }
4325   PetscFunctionReturn(0);
4326 }
4327 
4328 #undef __FUNCT__
4329 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4330 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4331 {
4332   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4333 
4334   PetscFunctionBegin;
4335   *Ad     = a->A;
4336   *Ao     = a->B;
4337   *colmap = a->garray;
4338   PetscFunctionReturn(0);
4339 }
4340 
4341 #undef __FUNCT__
4342 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4343 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4344 {
4345   PetscErrorCode ierr;
4346   PetscInt       i;
4347   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4348 
4349   PetscFunctionBegin;
4350   if (coloring->ctype == IS_COLORING_GLOBAL) {
4351     ISColoringValue *allcolors,*colors;
4352     ISColoring      ocoloring;
4353 
4354     /* set coloring for diagonal portion */
4355     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4356 
4357     /* set coloring for off-diagonal portion */
4358     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4359     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4360     for (i=0; i<a->B->cmap->n; i++) {
4361       colors[i] = allcolors[a->garray[i]];
4362     }
4363     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4364     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4365     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4366     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4367   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4368     ISColoringValue *colors;
4369     PetscInt        *larray;
4370     ISColoring      ocoloring;
4371 
4372     /* set coloring for diagonal portion */
4373     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4374     for (i=0; i<a->A->cmap->n; i++) {
4375       larray[i] = i + A->cmap->rstart;
4376     }
4377     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4378     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4379     for (i=0; i<a->A->cmap->n; i++) {
4380       colors[i] = coloring->colors[larray[i]];
4381     }
4382     ierr = PetscFree(larray);CHKERRQ(ierr);
4383     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4384     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4385     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4386 
4387     /* set coloring for off-diagonal portion */
4388     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4389     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4390     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4391     for (i=0; i<a->B->cmap->n; i++) {
4392       colors[i] = coloring->colors[larray[i]];
4393     }
4394     ierr = PetscFree(larray);CHKERRQ(ierr);
4395     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4396     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4397     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4398   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4399 
4400   PetscFunctionReturn(0);
4401 }
4402 
4403 #undef __FUNCT__
4404 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4405 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4406 {
4407   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4408   PetscErrorCode ierr;
4409 
4410   PetscFunctionBegin;
4411   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4412   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4413   PetscFunctionReturn(0);
4414 }
4415 
4416 #undef __FUNCT__
4417 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4418 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4419 {
4420   PetscErrorCode ierr;
4421   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4422   PetscInt       *indx;
4423 
4424   PetscFunctionBegin;
4425   /* This routine will ONLY return MPIAIJ type matrix */
4426   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4427   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4428   if (n == PETSC_DECIDE){
4429     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4430   }
4431   /* Check sum(n) = N */
4432   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4433   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4434 
4435   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4436   rstart -= m;
4437 
4438   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4439   for (i=0;i<m;i++) {
4440     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4441     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4442     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4443   }
4444 
4445   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4446   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4447   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4448   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4449   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4450   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4451   PetscFunctionReturn(0);
4452 }
4453 
4454 #undef __FUNCT__
4455 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4456 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4457 {
4458   PetscErrorCode ierr;
4459   PetscInt       m,N,i,rstart,nnz,Ii;
4460   PetscInt       *indx;
4461   PetscScalar    *values;
4462 
4463   PetscFunctionBegin;
4464   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4465   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4466   for (i=0;i<m;i++) {
4467     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4468     Ii    = i + rstart;
4469     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4470     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4471   }
4472   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4473   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4474   PetscFunctionReturn(0);
4475 }
4476 
4477 #undef __FUNCT__
4478 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4479 /*@
4480       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4481                  matrices from each processor
4482 
4483     Collective on MPI_Comm
4484 
4485    Input Parameters:
4486 +    comm - the communicators the parallel matrix will live on
4487 .    inmat - the input sequential matrices
4488 .    n - number of local columns (or PETSC_DECIDE)
4489 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4490 
4491    Output Parameter:
4492 .    outmat - the parallel matrix generated
4493 
4494     Level: advanced
4495 
4496    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4497 
4498 @*/
4499 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4500 {
4501   PetscErrorCode ierr;
4502 
4503   PetscFunctionBegin;
4504   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4505   if (scall == MAT_INITIAL_MATRIX){
4506     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4507   }
4508   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4509   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 #undef __FUNCT__
4514 #define __FUNCT__ "MatFileSplit"
4515 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4516 {
4517   PetscErrorCode    ierr;
4518   PetscMPIInt       rank;
4519   PetscInt          m,N,i,rstart,nnz;
4520   size_t            len;
4521   const PetscInt    *indx;
4522   PetscViewer       out;
4523   char              *name;
4524   Mat               B;
4525   const PetscScalar *values;
4526 
4527   PetscFunctionBegin;
4528   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4529   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4530   /* Should this be the type of the diagonal block of A? */
4531   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4532   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4533   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4534   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4535   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4536   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4537   for (i=0;i<m;i++) {
4538     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4539     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4540     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4541   }
4542   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4543   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4544 
4545   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4546   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4547   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4548   sprintf(name,"%s.%d",outfile,rank);
4549   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4550   ierr = PetscFree(name);CHKERRQ(ierr);
4551   ierr = MatView(B,out);CHKERRQ(ierr);
4552   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4553   ierr = MatDestroy(&B);CHKERRQ(ierr);
4554   PetscFunctionReturn(0);
4555 }
4556 
4557 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4558 #undef __FUNCT__
4559 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4560 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4561 {
4562   PetscErrorCode       ierr;
4563   Mat_Merge_SeqsToMPI  *merge;
4564   PetscContainer       container;
4565 
4566   PetscFunctionBegin;
4567   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4568   if (container) {
4569     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4570     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4571     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4572     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4573     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4574     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4575     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4576     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4577     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4578     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4579     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4580     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4581     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4582     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4583     ierr = PetscFree(merge);CHKERRQ(ierr);
4584     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4585   }
4586   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4587   PetscFunctionReturn(0);
4588 }
4589 
4590 #include <../src/mat/utils/freespace.h>
4591 #include <petscbt.h>
4592 
4593 #undef __FUNCT__
4594 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4595 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4596 {
4597   PetscErrorCode       ierr;
4598   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4599   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4600   PetscMPIInt          size,rank,taga,*len_s;
4601   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4602   PetscInt             proc,m;
4603   PetscInt             **buf_ri,**buf_rj;
4604   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4605   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4606   MPI_Request          *s_waits,*r_waits;
4607   MPI_Status           *status;
4608   MatScalar            *aa=a->a;
4609   MatScalar            **abuf_r,*ba_i;
4610   Mat_Merge_SeqsToMPI  *merge;
4611   PetscContainer       container;
4612 
4613   PetscFunctionBegin;
4614   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4615 
4616   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4617   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4618 
4619   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4620   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4621 
4622   bi     = merge->bi;
4623   bj     = merge->bj;
4624   buf_ri = merge->buf_ri;
4625   buf_rj = merge->buf_rj;
4626 
4627   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4628   owners = merge->rowmap->range;
4629   len_s  = merge->len_s;
4630 
4631   /* send and recv matrix values */
4632   /*-----------------------------*/
4633   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4634   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4635 
4636   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4637   for (proc=0,k=0; proc<size; proc++){
4638     if (!len_s[proc]) continue;
4639     i = owners[proc];
4640     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4641     k++;
4642   }
4643 
4644   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4645   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4646   ierr = PetscFree(status);CHKERRQ(ierr);
4647 
4648   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4649   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4650 
4651   /* insert mat values of mpimat */
4652   /*----------------------------*/
4653   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4654   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4655 
4656   for (k=0; k<merge->nrecv; k++){
4657     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4658     nrows = *(buf_ri_k[k]);
4659     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4660     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4661   }
4662 
4663   /* set values of ba */
4664   m = merge->rowmap->n;
4665   for (i=0; i<m; i++) {
4666     arow = owners[rank] + i;
4667     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4668     bnzi = bi[i+1] - bi[i];
4669     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4670 
4671     /* add local non-zero vals of this proc's seqmat into ba */
4672     anzi = ai[arow+1] - ai[arow];
4673     aj   = a->j + ai[arow];
4674     aa   = a->a + ai[arow];
4675     nextaj = 0;
4676     for (j=0; nextaj<anzi; j++){
4677       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4678         ba_i[j] += aa[nextaj++];
4679       }
4680     }
4681 
4682     /* add received vals into ba */
4683     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4684       /* i-th row */
4685       if (i == *nextrow[k]) {
4686         anzi = *(nextai[k]+1) - *nextai[k];
4687         aj   = buf_rj[k] + *(nextai[k]);
4688         aa   = abuf_r[k] + *(nextai[k]);
4689         nextaj = 0;
4690         for (j=0; nextaj<anzi; j++){
4691           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4692             ba_i[j] += aa[nextaj++];
4693           }
4694         }
4695         nextrow[k]++; nextai[k]++;
4696       }
4697     }
4698     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4699   }
4700   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4701   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4702 
4703   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4704   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4705   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4706   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4707   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4708   PetscFunctionReturn(0);
4709 }
4710 
4711 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4712 
4713 #undef __FUNCT__
4714 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4715 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4716 {
4717   PetscErrorCode       ierr;
4718   Mat                  B_mpi;
4719   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4720   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4721   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4722   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4723   PetscInt             len,proc,*dnz,*onz,bs,cbs;
4724   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4725   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4726   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4727   MPI_Status           *status;
4728   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4729   PetscBT              lnkbt;
4730   Mat_Merge_SeqsToMPI  *merge;
4731   PetscContainer       container;
4732 
4733   PetscFunctionBegin;
4734   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4735 
4736   /* make sure it is a PETSc comm */
4737   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4738   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4739   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4740 
4741   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4742   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4743 
4744   /* determine row ownership */
4745   /*---------------------------------------------------------*/
4746   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4747   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4748   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4749   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4750   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4751   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4752   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4753 
4754   m      = merge->rowmap->n;
4755   owners = merge->rowmap->range;
4756 
4757   /* determine the number of messages to send, their lengths */
4758   /*---------------------------------------------------------*/
4759   len_s  = merge->len_s;
4760 
4761   len = 0;  /* length of buf_si[] */
4762   merge->nsend = 0;
4763   for (proc=0; proc<size; proc++){
4764     len_si[proc] = 0;
4765     if (proc == rank){
4766       len_s[proc] = 0;
4767     } else {
4768       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4769       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4770     }
4771     if (len_s[proc]) {
4772       merge->nsend++;
4773       nrows = 0;
4774       for (i=owners[proc]; i<owners[proc+1]; i++){
4775         if (ai[i+1] > ai[i]) nrows++;
4776       }
4777       len_si[proc] = 2*(nrows+1);
4778       len += len_si[proc];
4779     }
4780   }
4781 
4782   /* determine the number and length of messages to receive for ij-structure */
4783   /*-------------------------------------------------------------------------*/
4784   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4785   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4786 
4787   /* post the Irecv of j-structure */
4788   /*-------------------------------*/
4789   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4790   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4791 
4792   /* post the Isend of j-structure */
4793   /*--------------------------------*/
4794   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4795 
4796   for (proc=0, k=0; proc<size; proc++){
4797     if (!len_s[proc]) continue;
4798     i = owners[proc];
4799     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4800     k++;
4801   }
4802 
4803   /* receives and sends of j-structure are complete */
4804   /*------------------------------------------------*/
4805   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4806   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4807 
4808   /* send and recv i-structure */
4809   /*---------------------------*/
4810   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4811   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4812 
4813   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4814   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4815   for (proc=0,k=0; proc<size; proc++){
4816     if (!len_s[proc]) continue;
4817     /* form outgoing message for i-structure:
4818          buf_si[0]:                 nrows to be sent
4819                [1:nrows]:           row index (global)
4820                [nrows+1:2*nrows+1]: i-structure index
4821     */
4822     /*-------------------------------------------*/
4823     nrows = len_si[proc]/2 - 1;
4824     buf_si_i    = buf_si + nrows+1;
4825     buf_si[0]   = nrows;
4826     buf_si_i[0] = 0;
4827     nrows = 0;
4828     for (i=owners[proc]; i<owners[proc+1]; i++){
4829       anzi = ai[i+1] - ai[i];
4830       if (anzi) {
4831         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4832         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4833         nrows++;
4834       }
4835     }
4836     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4837     k++;
4838     buf_si += len_si[proc];
4839   }
4840 
4841   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4842   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4843 
4844   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4845   for (i=0; i<merge->nrecv; i++){
4846     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4847   }
4848 
4849   ierr = PetscFree(len_si);CHKERRQ(ierr);
4850   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4851   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4852   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4853   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4854   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4855   ierr = PetscFree(status);CHKERRQ(ierr);
4856 
4857   /* compute a local seq matrix in each processor */
4858   /*----------------------------------------------*/
4859   /* allocate bi array and free space for accumulating nonzero column info */
4860   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4861   bi[0] = 0;
4862 
4863   /* create and initialize a linked list */
4864   nlnk = N+1;
4865   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4866 
4867   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4868   len  = ai[owners[rank+1]] - ai[owners[rank]];
4869   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4870   current_space = free_space;
4871 
4872   /* determine symbolic info for each local row */
4873   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4874 
4875   for (k=0; k<merge->nrecv; k++){
4876     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4877     nrows = *buf_ri_k[k];
4878     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4879     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4880   }
4881 
4882   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4883   len = 0;
4884   for (i=0;i<m;i++) {
4885     bnzi   = 0;
4886     /* add local non-zero cols of this proc's seqmat into lnk */
4887     arow   = owners[rank] + i;
4888     anzi   = ai[arow+1] - ai[arow];
4889     aj     = a->j + ai[arow];
4890     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4891     bnzi += nlnk;
4892     /* add received col data into lnk */
4893     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4894       if (i == *nextrow[k]) { /* i-th row */
4895         anzi = *(nextai[k]+1) - *nextai[k];
4896         aj   = buf_rj[k] + *nextai[k];
4897         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4898         bnzi += nlnk;
4899         nextrow[k]++; nextai[k]++;
4900       }
4901     }
4902     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4903 
4904     /* if free space is not available, make more free space */
4905     if (current_space->local_remaining<bnzi) {
4906       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4907       nspacedouble++;
4908     }
4909     /* copy data into free space, then initialize lnk */
4910     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4911     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4912 
4913     current_space->array           += bnzi;
4914     current_space->local_used      += bnzi;
4915     current_space->local_remaining -= bnzi;
4916 
4917     bi[i+1] = bi[i] + bnzi;
4918   }
4919 
4920   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4921 
4922   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4923   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4924   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4925 
4926   /* create symbolic parallel matrix B_mpi */
4927   /*---------------------------------------*/
4928     ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4929   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4930   if (n==PETSC_DECIDE) {
4931     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4932   } else {
4933     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4934   }
4935   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4936   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4937   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4938   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4939   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4940 
4941   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4942   B_mpi->assembled     = PETSC_FALSE;
4943   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4944   merge->bi            = bi;
4945   merge->bj            = bj;
4946   merge->buf_ri        = buf_ri;
4947   merge->buf_rj        = buf_rj;
4948   merge->coi           = PETSC_NULL;
4949   merge->coj           = PETSC_NULL;
4950   merge->owners_co     = PETSC_NULL;
4951 
4952   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4953 
4954   /* attach the supporting struct to B_mpi for reuse */
4955   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4956   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4957   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4958   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4959   *mpimat = B_mpi;
4960 
4961   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4962   PetscFunctionReturn(0);
4963 }
4964 
4965 #undef __FUNCT__
4966 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4967 /*@C
4968       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4969                  matrices from each processor
4970 
4971     Collective on MPI_Comm
4972 
4973    Input Parameters:
4974 +    comm - the communicators the parallel matrix will live on
4975 .    seqmat - the input sequential matrices
4976 .    m - number of local rows (or PETSC_DECIDE)
4977 .    n - number of local columns (or PETSC_DECIDE)
4978 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4979 
4980    Output Parameter:
4981 .    mpimat - the parallel matrix generated
4982 
4983     Level: advanced
4984 
4985    Notes:
4986      The dimensions of the sequential matrix in each processor MUST be the same.
4987      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4988      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4989 @*/
4990 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4991 {
4992   PetscErrorCode   ierr;
4993   PetscMPIInt     size;
4994 
4995   PetscFunctionBegin;
4996   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4997   if (size == 1){
4998      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4999      if (scall == MAT_INITIAL_MATRIX){
5000        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5001      } else {
5002        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5003      }
5004      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5005      PetscFunctionReturn(0);
5006   }
5007   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5008   if (scall == MAT_INITIAL_MATRIX){
5009     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5010   }
5011   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5012   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5013   PetscFunctionReturn(0);
5014 }
5015 
5016 #undef __FUNCT__
5017 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5018 /*@
5019      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5020           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5021           with MatGetSize()
5022 
5023     Not Collective
5024 
5025    Input Parameters:
5026 +    A - the matrix
5027 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5028 
5029    Output Parameter:
5030 .    A_loc - the local sequential matrix generated
5031 
5032     Level: developer
5033 
5034 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5035 
5036 @*/
5037 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5038 {
5039   PetscErrorCode  ierr;
5040   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
5041   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5042   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5043   MatScalar       *aa=a->a,*ba=b->a,*cam;
5044   PetscScalar     *ca;
5045   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5046   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
5047   PetscBool       match;
5048 
5049   PetscFunctionBegin;
5050   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5051   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5052   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5053   if (scall == MAT_INITIAL_MATRIX){
5054     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5055     ci[0] = 0;
5056     for (i=0; i<am; i++){
5057       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5058     }
5059     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5060     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5061     k = 0;
5062     for (i=0; i<am; i++) {
5063       ncols_o = bi[i+1] - bi[i];
5064       ncols_d = ai[i+1] - ai[i];
5065       /* off-diagonal portion of A */
5066       for (jo=0; jo<ncols_o; jo++) {
5067         col = cmap[*bj];
5068         if (col >= cstart) break;
5069         cj[k]   = col; bj++;
5070         ca[k++] = *ba++;
5071       }
5072       /* diagonal portion of A */
5073       for (j=0; j<ncols_d; j++) {
5074         cj[k]   = cstart + *aj++;
5075         ca[k++] = *aa++;
5076       }
5077       /* off-diagonal portion of A */
5078       for (j=jo; j<ncols_o; j++) {
5079         cj[k]   = cmap[*bj++];
5080         ca[k++] = *ba++;
5081       }
5082     }
5083     /* put together the new matrix */
5084     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5085     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5086     /* Since these are PETSc arrays, change flags to free them as necessary. */
5087     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5088     mat->free_a  = PETSC_TRUE;
5089     mat->free_ij = PETSC_TRUE;
5090     mat->nonew   = 0;
5091   } else if (scall == MAT_REUSE_MATRIX){
5092     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5093     ci = mat->i; cj = mat->j; cam = mat->a;
5094     for (i=0; i<am; i++) {
5095       /* off-diagonal portion of A */
5096       ncols_o = bi[i+1] - bi[i];
5097       for (jo=0; jo<ncols_o; jo++) {
5098         col = cmap[*bj];
5099         if (col >= cstart) break;
5100         *cam++ = *ba++; bj++;
5101       }
5102       /* diagonal portion of A */
5103       ncols_d = ai[i+1] - ai[i];
5104       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5105       /* off-diagonal portion of A */
5106       for (j=jo; j<ncols_o; j++) {
5107         *cam++ = *ba++; bj++;
5108       }
5109     }
5110   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5111   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5112   PetscFunctionReturn(0);
5113 }
5114 
5115 #undef __FUNCT__
5116 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5117 /*@C
5118      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5119 
5120     Not Collective
5121 
5122    Input Parameters:
5123 +    A - the matrix
5124 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5125 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
5126 
5127    Output Parameter:
5128 .    A_loc - the local sequential matrix generated
5129 
5130     Level: developer
5131 
5132 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5133 
5134 @*/
5135 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5136 {
5137   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5138   PetscErrorCode    ierr;
5139   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5140   IS                isrowa,iscola;
5141   Mat               *aloc;
5142   PetscBool       match;
5143 
5144   PetscFunctionBegin;
5145   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5146   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5147   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5148   if (!row){
5149     start = A->rmap->rstart; end = A->rmap->rend;
5150     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5151   } else {
5152     isrowa = *row;
5153   }
5154   if (!col){
5155     start = A->cmap->rstart;
5156     cmap  = a->garray;
5157     nzA   = a->A->cmap->n;
5158     nzB   = a->B->cmap->n;
5159     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5160     ncols = 0;
5161     for (i=0; i<nzB; i++) {
5162       if (cmap[i] < start) idx[ncols++] = cmap[i];
5163       else break;
5164     }
5165     imark = i;
5166     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5167     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5168     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5169   } else {
5170     iscola = *col;
5171   }
5172   if (scall != MAT_INITIAL_MATRIX){
5173     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5174     aloc[0] = *A_loc;
5175   }
5176   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5177   *A_loc = aloc[0];
5178   ierr = PetscFree(aloc);CHKERRQ(ierr);
5179   if (!row){
5180     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5181   }
5182   if (!col){
5183     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5184   }
5185   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5186   PetscFunctionReturn(0);
5187 }
5188 
5189 #undef __FUNCT__
5190 #define __FUNCT__ "MatGetBrowsOfAcols"
5191 /*@C
5192     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5193 
5194     Collective on Mat
5195 
5196    Input Parameters:
5197 +    A,B - the matrices in mpiaij format
5198 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5199 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5200 
5201    Output Parameter:
5202 +    rowb, colb - index sets of rows and columns of B to extract
5203 -    B_seq - the sequential matrix generated
5204 
5205     Level: developer
5206 
5207 @*/
5208 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5209 {
5210   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5211   PetscErrorCode    ierr;
5212   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5213   IS                isrowb,iscolb;
5214   Mat               *bseq=PETSC_NULL;
5215 
5216   PetscFunctionBegin;
5217   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5218     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5219   }
5220   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5221 
5222   if (scall == MAT_INITIAL_MATRIX){
5223     start = A->cmap->rstart;
5224     cmap  = a->garray;
5225     nzA   = a->A->cmap->n;
5226     nzB   = a->B->cmap->n;
5227     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5228     ncols = 0;
5229     for (i=0; i<nzB; i++) {  /* row < local row index */
5230       if (cmap[i] < start) idx[ncols++] = cmap[i];
5231       else break;
5232     }
5233     imark = i;
5234     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5235     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5236     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5237     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5238   } else {
5239     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5240     isrowb = *rowb; iscolb = *colb;
5241     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5242     bseq[0] = *B_seq;
5243   }
5244   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5245   *B_seq = bseq[0];
5246   ierr = PetscFree(bseq);CHKERRQ(ierr);
5247   if (!rowb){
5248     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5249   } else {
5250     *rowb = isrowb;
5251   }
5252   if (!colb){
5253     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5254   } else {
5255     *colb = iscolb;
5256   }
5257   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5258   PetscFunctionReturn(0);
5259 }
5260 
5261 #undef __FUNCT__
5262 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5263 /*
5264     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5265     of the OFF-DIAGONAL portion of local A
5266 
5267     Collective on Mat
5268 
5269    Input Parameters:
5270 +    A,B - the matrices in mpiaij format
5271 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5272 
5273    Output Parameter:
5274 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5275 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5276 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5277 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5278 
5279     Level: developer
5280 
5281 */
5282 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5283 {
5284   VecScatter_MPI_General *gen_to,*gen_from;
5285   PetscErrorCode         ierr;
5286   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5287   Mat_SeqAIJ             *b_oth;
5288   VecScatter             ctx=a->Mvctx;
5289   MPI_Comm               comm=((PetscObject)ctx)->comm;
5290   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5291   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5292   PetscScalar            *rvalues,*svalues;
5293   MatScalar              *b_otha,*bufa,*bufA;
5294   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5295   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5296   MPI_Status             *sstatus,rstatus;
5297   PetscMPIInt            jj;
5298   PetscInt               *cols,sbs,rbs;
5299   PetscScalar            *vals;
5300 
5301   PetscFunctionBegin;
5302   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5303     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5304   }
5305   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5306   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5307 
5308   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5309   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5310   rvalues  = gen_from->values; /* holds the length of receiving row */
5311   svalues  = gen_to->values;   /* holds the length of sending row */
5312   nrecvs   = gen_from->n;
5313   nsends   = gen_to->n;
5314 
5315   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5316   srow     = gen_to->indices;   /* local row index to be sent */
5317   sstarts  = gen_to->starts;
5318   sprocs   = gen_to->procs;
5319   sstatus  = gen_to->sstatus;
5320   sbs      = gen_to->bs;
5321   rstarts  = gen_from->starts;
5322   rprocs   = gen_from->procs;
5323   rbs      = gen_from->bs;
5324 
5325   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5326   if (scall == MAT_INITIAL_MATRIX){
5327     /* i-array */
5328     /*---------*/
5329     /*  post receives */
5330     for (i=0; i<nrecvs; i++){
5331       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5332       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5333       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5334     }
5335 
5336     /* pack the outgoing message */
5337     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5338     sstartsj[0] = 0;  rstartsj[0] = 0;
5339     len = 0; /* total length of j or a array to be sent */
5340     k = 0;
5341     for (i=0; i<nsends; i++){
5342       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5343       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5344       for (j=0; j<nrows; j++) {
5345         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5346         for (l=0; l<sbs; l++){
5347           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5348           rowlen[j*sbs+l] = ncols;
5349           len += ncols;
5350           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5351         }
5352         k++;
5353       }
5354       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5355       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5356     }
5357     /* recvs and sends of i-array are completed */
5358     i = nrecvs;
5359     while (i--) {
5360       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5361     }
5362     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5363 
5364     /* allocate buffers for sending j and a arrays */
5365     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5366     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5367 
5368     /* create i-array of B_oth */
5369     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5370     b_othi[0] = 0;
5371     len = 0; /* total length of j or a array to be received */
5372     k = 0;
5373     for (i=0; i<nrecvs; i++){
5374       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5375       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5376       for (j=0; j<nrows; j++) {
5377         b_othi[k+1] = b_othi[k] + rowlen[j];
5378         len += rowlen[j]; k++;
5379       }
5380       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5381     }
5382 
5383     /* allocate space for j and a arrrays of B_oth */
5384     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5385     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5386 
5387     /* j-array */
5388     /*---------*/
5389     /*  post receives of j-array */
5390     for (i=0; i<nrecvs; i++){
5391       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5392       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5393     }
5394 
5395     /* pack the outgoing message j-array */
5396     k = 0;
5397     for (i=0; i<nsends; i++){
5398       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5399       bufJ = bufj+sstartsj[i];
5400       for (j=0; j<nrows; j++) {
5401         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5402         for (ll=0; ll<sbs; ll++){
5403           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5404           for (l=0; l<ncols; l++){
5405             *bufJ++ = cols[l];
5406           }
5407           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5408         }
5409       }
5410       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5411     }
5412 
5413     /* recvs and sends of j-array are completed */
5414     i = nrecvs;
5415     while (i--) {
5416       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5417     }
5418     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5419   } else if (scall == MAT_REUSE_MATRIX){
5420     sstartsj = *startsj_s;
5421     rstartsj = *startsj_r;
5422     bufa     = *bufa_ptr;
5423     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5424     b_otha   = b_oth->a;
5425   } else {
5426     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5427   }
5428 
5429   /* a-array */
5430   /*---------*/
5431   /*  post receives of a-array */
5432   for (i=0; i<nrecvs; i++){
5433     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5434     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5435   }
5436 
5437   /* pack the outgoing message a-array */
5438   k = 0;
5439   for (i=0; i<nsends; i++){
5440     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5441     bufA = bufa+sstartsj[i];
5442     for (j=0; j<nrows; j++) {
5443       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5444       for (ll=0; ll<sbs; ll++){
5445         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5446         for (l=0; l<ncols; l++){
5447           *bufA++ = vals[l];
5448         }
5449         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5450       }
5451     }
5452     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5453   }
5454   /* recvs and sends of a-array are completed */
5455   i = nrecvs;
5456   while (i--) {
5457     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5458   }
5459   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5460   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5461 
5462   if (scall == MAT_INITIAL_MATRIX){
5463     /* put together the new matrix */
5464     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5465 
5466     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5467     /* Since these are PETSc arrays, change flags to free them as necessary. */
5468     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5469     b_oth->free_a  = PETSC_TRUE;
5470     b_oth->free_ij = PETSC_TRUE;
5471     b_oth->nonew   = 0;
5472 
5473     ierr = PetscFree(bufj);CHKERRQ(ierr);
5474     if (!startsj_s || !bufa_ptr){
5475       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5476       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5477     } else {
5478       *startsj_s = sstartsj;
5479       *startsj_r = rstartsj;
5480       *bufa_ptr  = bufa;
5481     }
5482   }
5483   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5484   PetscFunctionReturn(0);
5485 }
5486 
5487 #undef __FUNCT__
5488 #define __FUNCT__ "MatGetCommunicationStructs"
5489 /*@C
5490   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5491 
5492   Not Collective
5493 
5494   Input Parameters:
5495 . A - The matrix in mpiaij format
5496 
5497   Output Parameter:
5498 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5499 . colmap - A map from global column index to local index into lvec
5500 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5501 
5502   Level: developer
5503 
5504 @*/
5505 #if defined (PETSC_USE_CTABLE)
5506 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5507 #else
5508 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5509 #endif
5510 {
5511   Mat_MPIAIJ *a;
5512 
5513   PetscFunctionBegin;
5514   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5515   PetscValidPointer(lvec, 2);
5516   PetscValidPointer(colmap, 3);
5517   PetscValidPointer(multScatter, 4);
5518   a = (Mat_MPIAIJ *) A->data;
5519   if (lvec) *lvec = a->lvec;
5520   if (colmap) *colmap = a->colmap;
5521   if (multScatter) *multScatter = a->Mvctx;
5522   PetscFunctionReturn(0);
5523 }
5524 
5525 EXTERN_C_BEGIN
5526 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5527 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5528 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5529 EXTERN_C_END
5530 
5531 #undef __FUNCT__
5532 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5533 /*
5534     Computes (B'*A')' since computing B*A directly is untenable
5535 
5536                n                       p                          p
5537         (              )       (              )         (                  )
5538       m (      A       )  *  n (       B      )   =   m (         C        )
5539         (              )       (              )         (                  )
5540 
5541 */
5542 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5543 {
5544   PetscErrorCode     ierr;
5545   Mat                At,Bt,Ct;
5546 
5547   PetscFunctionBegin;
5548   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5549   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5550   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5551   ierr = MatDestroy(&At);CHKERRQ(ierr);
5552   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5553   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5554   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5555   PetscFunctionReturn(0);
5556 }
5557 
5558 #undef __FUNCT__
5559 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5560 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5561 {
5562   PetscErrorCode ierr;
5563   PetscInt       m=A->rmap->n,n=B->cmap->n;
5564   Mat            Cmat;
5565 
5566   PetscFunctionBegin;
5567   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5568   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5569   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5570   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5571   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5572   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5573   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5574   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5575 
5576   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5577   *C = Cmat;
5578   PetscFunctionReturn(0);
5579 }
5580 
5581 /* ----------------------------------------------------------------*/
5582 #undef __FUNCT__
5583 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5584 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5585 {
5586   PetscErrorCode ierr;
5587 
5588   PetscFunctionBegin;
5589   if (scall == MAT_INITIAL_MATRIX){
5590     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5591   }
5592   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5593   PetscFunctionReturn(0);
5594 }
5595 
5596 EXTERN_C_BEGIN
5597 #if defined(PETSC_HAVE_MUMPS)
5598 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5599 #endif
5600 #if defined(PETSC_HAVE_PASTIX)
5601 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5602 #endif
5603 #if defined(PETSC_HAVE_SUPERLU_DIST)
5604 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5605 #endif
5606 #if defined(PETSC_HAVE_CLIQUE)
5607 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5608 #endif
5609 EXTERN_C_END
5610 
5611 /*MC
5612    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5613 
5614    Options Database Keys:
5615 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5616 
5617   Level: beginner
5618 
5619 .seealso: MatCreateAIJ()
5620 M*/
5621 
5622 EXTERN_C_BEGIN
5623 #undef __FUNCT__
5624 #define __FUNCT__ "MatCreate_MPIAIJ"
5625 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5626 {
5627   Mat_MPIAIJ     *b;
5628   PetscErrorCode ierr;
5629   PetscMPIInt    size;
5630 
5631   PetscFunctionBegin;
5632   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5633   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5634   B->data         = (void*)b;
5635   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5636   B->assembled    = PETSC_FALSE;
5637   B->insertmode   = NOT_SET_VALUES;
5638   b->size         = size;
5639   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5640 
5641   /* build cache for off array entries formed */
5642   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5643   b->donotstash  = PETSC_FALSE;
5644   b->colmap      = 0;
5645   b->garray      = 0;
5646   b->roworiented = PETSC_TRUE;
5647 
5648   /* stuff used for matrix vector multiply */
5649   b->lvec      = PETSC_NULL;
5650   b->Mvctx     = PETSC_NULL;
5651 
5652   /* stuff for MatGetRow() */
5653   b->rowindices   = 0;
5654   b->rowvalues    = 0;
5655   b->getrowactive = PETSC_FALSE;
5656 
5657   /* flexible pointer used in CUSP/CUSPARSE classes */
5658   b->spptr        = PETSC_NULL;
5659 
5660 #if defined(PETSC_HAVE_MUMPS)
5661   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5662                                      "MatGetFactor_aij_mumps",
5663                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5664 #endif
5665 #if defined(PETSC_HAVE_PASTIX)
5666   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5667 					   "MatGetFactor_mpiaij_pastix",
5668 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5669 #endif
5670 #if defined(PETSC_HAVE_SUPERLU_DIST)
5671   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5672                                      "MatGetFactor_mpiaij_superlu_dist",
5673                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5674 #endif
5675 #if defined(PETSC_HAVE_CLIQUE)
5676   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C",
5677                                      "MatGetFactor_aij_clique",
5678                                      MatGetFactor_aij_clique);CHKERRQ(ierr);
5679 #endif
5680   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5681                                      "MatStoreValues_MPIAIJ",
5682                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5683   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5684                                      "MatRetrieveValues_MPIAIJ",
5685                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5686   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5687 				     "MatGetDiagonalBlock_MPIAIJ",
5688                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5689   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5690 				     "MatIsTranspose_MPIAIJ",
5691 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5692   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5693 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5694 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5695   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5696 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5697 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5698   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5699 				     "MatDiagonalScaleLocal_MPIAIJ",
5700 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5701   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5702                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5703                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5704   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5705                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5706                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5707   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5708                                      "MatConvert_MPIAIJ_MPISBAIJ",
5709                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5710   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5711                                      "MatMatMult_MPIDense_MPIAIJ",
5712                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5713   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5714                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5715                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5716   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5717                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5718                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5719   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5720   PetscFunctionReturn(0);
5721 }
5722 EXTERN_C_END
5723 
5724 #undef __FUNCT__
5725 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5726 /*@
5727      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5728          and "off-diagonal" part of the matrix in CSR format.
5729 
5730    Collective on MPI_Comm
5731 
5732    Input Parameters:
5733 +  comm - MPI communicator
5734 .  m - number of local rows (Cannot be PETSC_DECIDE)
5735 .  n - This value should be the same as the local size used in creating the
5736        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5737        calculated if N is given) For square matrices n is almost always m.
5738 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5739 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5740 .   i - row indices for "diagonal" portion of matrix
5741 .   j - column indices
5742 .   a - matrix values
5743 .   oi - row indices for "off-diagonal" portion of matrix
5744 .   oj - column indices
5745 -   oa - matrix values
5746 
5747    Output Parameter:
5748 .   mat - the matrix
5749 
5750    Level: advanced
5751 
5752    Notes:
5753        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5754        must free the arrays once the matrix has been destroyed and not before.
5755 
5756        The i and j indices are 0 based
5757 
5758        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5759 
5760        This sets local rows and cannot be used to set off-processor values.
5761 
5762        You cannot later use MatSetValues() to change values in this matrix.
5763 
5764 .keywords: matrix, aij, compressed row, sparse, parallel
5765 
5766 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5767           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5768 @*/
5769 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5770 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5771 {
5772   PetscErrorCode ierr;
5773   Mat_MPIAIJ     *maij;
5774 
5775  PetscFunctionBegin;
5776   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5777   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5778   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5779   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5780   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5781   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5782   maij = (Mat_MPIAIJ*) (*mat)->data;
5783   maij->donotstash     = PETSC_TRUE;
5784   (*mat)->preallocated = PETSC_TRUE;
5785 
5786   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5787   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5788 
5789   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5790   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5791 
5792   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5793   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5794   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5795   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5796 
5797   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5798   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5799   PetscFunctionReturn(0);
5800 }
5801 
5802 /*
5803     Special version for direct calls from Fortran
5804 */
5805 #include <petsc-private/fortranimpl.h>
5806 
5807 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5808 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5809 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5810 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5811 #endif
5812 
5813 /* Change these macros so can be used in void function */
5814 #undef CHKERRQ
5815 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5816 #undef SETERRQ2
5817 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5818 #undef SETERRQ3
5819 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5820 #undef SETERRQ
5821 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5822 
5823 EXTERN_C_BEGIN
5824 #undef __FUNCT__
5825 #define __FUNCT__ "matsetvaluesmpiaij_"
5826 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5827 {
5828   Mat             mat = *mmat;
5829   PetscInt        m = *mm, n = *mn;
5830   InsertMode      addv = *maddv;
5831   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5832   PetscScalar     value;
5833   PetscErrorCode  ierr;
5834 
5835   MatCheckPreallocated(mat,1);
5836   if (mat->insertmode == NOT_SET_VALUES) {
5837     mat->insertmode = addv;
5838   }
5839 #if defined(PETSC_USE_DEBUG)
5840   else if (mat->insertmode != addv) {
5841     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5842   }
5843 #endif
5844   {
5845   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5846   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5847   PetscBool       roworiented = aij->roworiented;
5848 
5849   /* Some Variables required in the macro */
5850   Mat             A = aij->A;
5851   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5852   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5853   MatScalar       *aa = a->a;
5854   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5855   Mat             B = aij->B;
5856   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5857   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5858   MatScalar       *ba = b->a;
5859 
5860   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5861   PetscInt        nonew = a->nonew;
5862   MatScalar       *ap1,*ap2;
5863 
5864   PetscFunctionBegin;
5865   for (i=0; i<m; i++) {
5866     if (im[i] < 0) continue;
5867 #if defined(PETSC_USE_DEBUG)
5868     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5869 #endif
5870     if (im[i] >= rstart && im[i] < rend) {
5871       row      = im[i] - rstart;
5872       lastcol1 = -1;
5873       rp1      = aj + ai[row];
5874       ap1      = aa + ai[row];
5875       rmax1    = aimax[row];
5876       nrow1    = ailen[row];
5877       low1     = 0;
5878       high1    = nrow1;
5879       lastcol2 = -1;
5880       rp2      = bj + bi[row];
5881       ap2      = ba + bi[row];
5882       rmax2    = bimax[row];
5883       nrow2    = bilen[row];
5884       low2     = 0;
5885       high2    = nrow2;
5886 
5887       for (j=0; j<n; j++) {
5888         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5889         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5890         if (in[j] >= cstart && in[j] < cend){
5891           col = in[j] - cstart;
5892           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5893         } else if (in[j] < 0) continue;
5894 #if defined(PETSC_USE_DEBUG)
5895         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5896 #endif
5897         else {
5898           if (mat->was_assembled) {
5899             if (!aij->colmap) {
5900               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5901             }
5902 #if defined (PETSC_USE_CTABLE)
5903             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5904 	    col--;
5905 #else
5906             col = aij->colmap[in[j]] - 1;
5907 #endif
5908             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5909               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5910               col =  in[j];
5911               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5912               B = aij->B;
5913               b = (Mat_SeqAIJ*)B->data;
5914               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5915               rp2      = bj + bi[row];
5916               ap2      = ba + bi[row];
5917               rmax2    = bimax[row];
5918               nrow2    = bilen[row];
5919               low2     = 0;
5920               high2    = nrow2;
5921               bm       = aij->B->rmap->n;
5922               ba = b->a;
5923             }
5924           } else col = in[j];
5925           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5926         }
5927       }
5928     } else {
5929       if (!aij->donotstash) {
5930         if (roworiented) {
5931           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5932         } else {
5933           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5934         }
5935       }
5936     }
5937   }}
5938   PetscFunctionReturnVoid();
5939 }
5940 EXTERN_C_END
5941 
5942