xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 00d931fe9835bef04c3bcd2a9a1bf118d64cc4c2) !
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *owners = A->rmap->range;
787   PetscInt       n      = A->rmap->n;
788   PetscSF        sf;
789   PetscInt      *lrows;
790   PetscSFNode   *rrows;
791   PetscInt       r, p = 0, len = 0;
792   PetscErrorCode ierr;
793 
794   PetscFunctionBegin;
795   /* Create SF where leaves are input rows and roots are owned rows */
796   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
797   for (r = 0; r < n; ++r) lrows[r] = -1;
798   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
799   for (r = 0; r < N; ++r) {
800     const PetscInt idx   = rows[r];
801     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
802     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
803       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
804     }
805     if (A->nooffproczerorows) {
806       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
807       lrows[len++] = idx - owners[p];
808     } else {
809       rrows[r].rank = p;
810       rrows[r].index = rows[r] - owners[p];
811     }
812   }
813   if (!A->nooffproczerorows) {
814     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
815     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
816     /* Collect flags for rows to be zeroed */
817     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
818     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
819     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
820     /* Compress and put in row numbers */
821     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
822   }
823   /* fix right hand side if needed */
824   if (x && b) {
825     const PetscScalar *xx;
826     PetscScalar       *bb;
827 
828     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
829     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
830     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
831     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
833   }
834   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
835   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
836   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
837     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
838   } else if (diag != 0.0) {
839     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
841     for (r = 0; r < len; ++r) {
842       const PetscInt row = lrows[r] + A->rmap->rstart;
843       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
844     }
845     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
846     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
847   } else {
848     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
849   }
850   ierr = PetscFree(lrows);CHKERRQ(ierr);
851 
852   /* only change matrix nonzero state if pattern was allowed to be changed */
853   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
854     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
855     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
865   PetscErrorCode    ierr;
866   PetscMPIInt       n = A->rmap->n;
867   PetscInt          i,j,r,m,p = 0,len = 0;
868   PetscInt          *lrows,*owners = A->rmap->range;
869   PetscSFNode       *rrows;
870   PetscSF           sf;
871   const PetscScalar *xx;
872   PetscScalar       *bb,*mask;
873   Vec               xmask,lmask;
874   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
875   const PetscInt    *aj, *ii,*ridx;
876   PetscScalar       *aa;
877 
878   PetscFunctionBegin;
879   /* Create SF where leaves are input rows and roots are owned rows */
880   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
881   for (r = 0; r < n; ++r) lrows[r] = -1;
882   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
883   for (r = 0; r < N; ++r) {
884     const PetscInt idx   = rows[r];
885     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
886     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
887       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
888     }
889     rrows[r].rank  = p;
890     rrows[r].index = rows[r] - owners[p];
891   }
892   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
893   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
894   /* Collect flags for rows to be zeroed */
895   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
896   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
897   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
898   /* Compress and put in row numbers */
899   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
900   /* zero diagonal part of matrix */
901   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
902   /* handle off diagonal part of matrix */
903   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
904   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
905   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
906   for (i=0; i<len; i++) bb[lrows[i]] = 1;
907   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
908   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
910   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
911   if (x) {
912     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
915     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
916   }
917   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
918   /* remove zeroed rows of off diagonal matrix */
919   ii = aij->i;
920   for (i=0; i<len; i++) {
921     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
922   }
923   /* loop over all elements of off process part of matrix zeroing removed columns*/
924   if (aij->compressedrow.use) {
925     m    = aij->compressedrow.nrows;
926     ii   = aij->compressedrow.i;
927     ridx = aij->compressedrow.rindex;
928     for (i=0; i<m; i++) {
929       n  = ii[i+1] - ii[i];
930       aj = aij->j + ii[i];
931       aa = aij->a + ii[i];
932 
933       for (j=0; j<n; j++) {
934         if (PetscAbsScalar(mask[*aj])) {
935           if (b) bb[*ridx] -= *aa*xx[*aj];
936           *aa = 0.0;
937         }
938         aa++;
939         aj++;
940       }
941       ridx++;
942     }
943   } else { /* do not use compressed row format */
944     m = l->B->rmap->n;
945     for (i=0; i<m; i++) {
946       n  = ii[i+1] - ii[i];
947       aj = aij->j + ii[i];
948       aa = aij->a + ii[i];
949       for (j=0; j<n; j++) {
950         if (PetscAbsScalar(mask[*aj])) {
951           if (b) bb[i] -= *aa*xx[*aj];
952           *aa = 0.0;
953         }
954         aa++;
955         aj++;
956       }
957     }
958   }
959   if (x) {
960     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
961     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
962   }
963   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
964   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
965   ierr = PetscFree(lrows);CHKERRQ(ierr);
966 
967   /* only change matrix nonzero state if pattern was allowed to be changed */
968   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
969     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
970     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
971   }
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatMult_MPIAIJ"
977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981   PetscInt       nt;
982 
983   PetscFunctionBegin;
984   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
985   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
986   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
988   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
996 {
997   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
998   PetscErrorCode ierr;
999 
1000   PetscFunctionBegin;
1001   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1008 {
1009   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1010   PetscErrorCode ierr;
1011 
1012   PetscFunctionBegin;
1013   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1015   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1023 {
1024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1025   PetscErrorCode ierr;
1026   PetscBool      merged;
1027 
1028   PetscFunctionBegin;
1029   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1030   /* do nondiagonal part */
1031   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1032   if (!merged) {
1033     /* send it on its way */
1034     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1035     /* do local part */
1036     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1037     /* receive remote parts: note this assumes the values are not actually */
1038     /* added in yy until the next line, */
1039     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1040   } else {
1041     /* do local part */
1042     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1043     /* send it on its way */
1044     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1045     /* values actually were received in the Begin() but we need to call this nop */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1053 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1054 {
1055   MPI_Comm       comm;
1056   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1057   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1058   IS             Me,Notme;
1059   PetscErrorCode ierr;
1060   PetscInt       M,N,first,last,*notme,i;
1061   PetscMPIInt    size;
1062 
1063   PetscFunctionBegin;
1064   /* Easy test: symmetric diagonal block */
1065   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1066   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1067   if (!*f) PetscFunctionReturn(0);
1068   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1069   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1070   if (size == 1) PetscFunctionReturn(0);
1071 
1072   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1073   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1074   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1075   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1076   for (i=0; i<first; i++) notme[i] = i;
1077   for (i=last; i<M; i++) notme[i-last+first] = i;
1078   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1079   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1080   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1081   Aoff = Aoffs[0];
1082   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1083   Boff = Boffs[0];
1084   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1085   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1086   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1087   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1088   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1089   ierr = PetscFree(notme);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1096 {
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101   /* do nondiagonal part */
1102   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1103   /* send it on its way */
1104   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1105   /* do local part */
1106   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1107   /* receive remote parts */
1108   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 /*
1113   This only works correctly for square matrices where the subblock A->A is the
1114    diagonal block
1115 */
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1119 {
1120   PetscErrorCode ierr;
1121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1122 
1123   PetscFunctionBegin;
1124   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1125   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1126   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatScale_MPIAIJ"
1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1139   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatDestroy_MPIAIJ"
1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1146 {
1147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151 #if defined(PETSC_USE_LOG)
1152   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1153 #endif
1154   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1155   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1156   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1157   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1158 #if defined(PETSC_USE_CTABLE)
1159   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1160 #else
1161   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1162 #endif
1163   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1164   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1165   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1166   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1167   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1168   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1169 
1170   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1179 #if defined(PETSC_HAVE_ELEMENTAL)
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1181 #endif
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1188 {
1189   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1190   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1191   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1192   PetscErrorCode ierr;
1193   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1194   int            fd;
1195   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1196   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1197   PetscScalar    *column_values;
1198   PetscInt       message_count,flowcontrolcount;
1199   FILE           *file;
1200 
1201   PetscFunctionBegin;
1202   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1203   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1204   nz   = A->nz + B->nz;
1205   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1206   if (!rank) {
1207     header[0] = MAT_FILE_CLASSID;
1208     header[1] = mat->rmap->N;
1209     header[2] = mat->cmap->N;
1210 
1211     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1213     /* get largest number of rows any processor has */
1214     rlen  = mat->rmap->n;
1215     range = mat->rmap->range;
1216     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1217   } else {
1218     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     rlen = mat->rmap->n;
1220   }
1221 
1222   /* load up the local row counts */
1223   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1224   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1225 
1226   /* store the row lengths to the file */
1227   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1228   if (!rank) {
1229     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1230     for (i=1; i<size; i++) {
1231       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1232       rlen = range[i+1] - range[i];
1233       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1235     }
1236     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1237   } else {
1238     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1239     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1241   }
1242   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1243 
1244   /* load up the local column indices */
1245   nzmax = nz; /* th processor needs space a largest processor needs */
1246   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1248   cnt   = 0;
1249   for (i=0; i<mat->rmap->n; i++) {
1250     for (j=B->i[i]; j<B->i[i+1]; j++) {
1251       if ((col = garray[B->j[j]]) > cstart) break;
1252       column_indices[cnt++] = col;
1253     }
1254     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1255     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1256   }
1257   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1258 
1259   /* store the column indices to the file */
1260   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1261   if (!rank) {
1262     MPI_Status status;
1263     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     for (i=1; i<size; i++) {
1265       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1266       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1267       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1268       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1270     }
1271     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1272   } else {
1273     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1274     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1275     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1277   }
1278   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1279 
1280   /* load up the local column values */
1281   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1282   cnt  = 0;
1283   for (i=0; i<mat->rmap->n; i++) {
1284     for (j=B->i[i]; j<B->i[i+1]; j++) {
1285       if (garray[B->j[j]] > cstart) break;
1286       column_values[cnt++] = B->a[j];
1287     }
1288     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1289     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1290   }
1291   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1292 
1293   /* store the column values to the file */
1294   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1295   if (!rank) {
1296     MPI_Status status;
1297     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1298     for (i=1; i<size; i++) {
1299       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1300       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1301       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1302       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1303       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1304     }
1305     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1306   } else {
1307     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1308     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1309     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1311   }
1312   ierr = PetscFree(column_values);CHKERRQ(ierr);
1313 
1314   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1315   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #include <petscdraw.h>
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1323 {
1324   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1325   PetscErrorCode    ierr;
1326   PetscMPIInt       rank = aij->rank,size = aij->size;
1327   PetscBool         isdraw,iascii,isbinary;
1328   PetscViewer       sviewer;
1329   PetscViewerFormat format;
1330 
1331   PetscFunctionBegin;
1332   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1333   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1334   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1335   if (iascii) {
1336     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1337     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1338       MatInfo   info;
1339       PetscBool inodes;
1340 
1341       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1342       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1343       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1344       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1345       if (!inodes) {
1346         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1347                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1348       } else {
1349         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1350                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1351       }
1352       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1353       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1354       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1355       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1356       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1357       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1358       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1359       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1360       PetscFunctionReturn(0);
1361     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1362       PetscInt inodecount,inodelimit,*inodes;
1363       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1364       if (inodes) {
1365         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1366       } else {
1367         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1368       }
1369       PetscFunctionReturn(0);
1370     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1371       PetscFunctionReturn(0);
1372     }
1373   } else if (isbinary) {
1374     if (size == 1) {
1375       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1376       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1377     } else {
1378       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1379     }
1380     PetscFunctionReturn(0);
1381   } else if (isdraw) {
1382     PetscDraw draw;
1383     PetscBool isnull;
1384     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1385     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1386   }
1387 
1388   {
1389     /* assemble the entire matrix onto first processor. */
1390     Mat        A;
1391     Mat_SeqAIJ *Aloc;
1392     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1393     MatScalar  *a;
1394 
1395     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1396     if (!rank) {
1397       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1398     } else {
1399       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1400     }
1401     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1402     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1403     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1404     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1405     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1406 
1407     /* copy over the A part */
1408     Aloc = (Mat_SeqAIJ*)aij->A->data;
1409     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1410     row  = mat->rmap->rstart;
1411     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1412     for (i=0; i<m; i++) {
1413       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1414       row++;
1415       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1416     }
1417     aj = Aloc->j;
1418     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1419 
1420     /* copy over the B part */
1421     Aloc = (Mat_SeqAIJ*)aij->B->data;
1422     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1423     row  = mat->rmap->rstart;
1424     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1425     ct   = cols;
1426     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1427     for (i=0; i<m; i++) {
1428       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1429       row++;
1430       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1431     }
1432     ierr = PetscFree(ct);CHKERRQ(ierr);
1433     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1434     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1435     /*
1436        Everyone has to call to draw the matrix since the graphics waits are
1437        synchronized across all processors that share the PetscDraw object
1438     */
1439     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1440     if (!rank) {
1441       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1442       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1443     }
1444     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1445     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1446     ierr = MatDestroy(&A);CHKERRQ(ierr);
1447   }
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatView_MPIAIJ"
1453 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1454 {
1455   PetscErrorCode ierr;
1456   PetscBool      iascii,isdraw,issocket,isbinary;
1457 
1458   PetscFunctionBegin;
1459   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1460   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1461   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1462   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1463   if (iascii || isdraw || isbinary || issocket) {
1464     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatSOR_MPIAIJ"
1471 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1472 {
1473   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1474   PetscErrorCode ierr;
1475   Vec            bb1 = 0;
1476   PetscBool      hasop;
1477 
1478   PetscFunctionBegin;
1479   if (flag == SOR_APPLY_UPPER) {
1480     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1481     PetscFunctionReturn(0);
1482   }
1483 
1484   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1485     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1486   }
1487 
1488   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1489     if (flag & SOR_ZERO_INITIAL_GUESS) {
1490       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1491       its--;
1492     }
1493 
1494     while (its--) {
1495       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1496       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1497 
1498       /* update rhs: bb1 = bb - B*x */
1499       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1500       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1501 
1502       /* local sweep */
1503       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1504     }
1505   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1506     if (flag & SOR_ZERO_INITIAL_GUESS) {
1507       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1508       its--;
1509     }
1510     while (its--) {
1511       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1512       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1513 
1514       /* update rhs: bb1 = bb - B*x */
1515       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1516       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1517 
1518       /* local sweep */
1519       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1520     }
1521   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1522     if (flag & SOR_ZERO_INITIAL_GUESS) {
1523       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1524       its--;
1525     }
1526     while (its--) {
1527       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1528       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1529 
1530       /* update rhs: bb1 = bb - B*x */
1531       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1532       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1533 
1534       /* local sweep */
1535       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1536     }
1537   } else if (flag & SOR_EISENSTAT) {
1538     Vec xx1;
1539 
1540     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1541     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1542 
1543     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1544     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1545     if (!mat->diag) {
1546       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1547       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1548     }
1549     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1550     if (hasop) {
1551       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1552     } else {
1553       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1554     }
1555     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1556 
1557     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1558 
1559     /* local sweep */
1560     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1561     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1562     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1563   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1564 
1565   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1566 
1567   matin->errortype = mat->A->errortype;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatPermute_MPIAIJ"
1573 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1574 {
1575   Mat            aA,aB,Aperm;
1576   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1577   PetscScalar    *aa,*ba;
1578   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1579   PetscSF        rowsf,sf;
1580   IS             parcolp = NULL;
1581   PetscBool      done;
1582   PetscErrorCode ierr;
1583 
1584   PetscFunctionBegin;
1585   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1586   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1587   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1588   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1589 
1590   /* Invert row permutation to find out where my rows should go */
1591   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1592   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1593   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1594   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1595   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1596   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1597 
1598   /* Invert column permutation to find out where my columns should go */
1599   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1600   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1601   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1602   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1603   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1604   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1605   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1606 
1607   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1608   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1609   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1610 
1611   /* Find out where my gcols should go */
1612   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1613   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1614   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1615   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1616   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1617   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1618   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1619   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1620 
1621   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1622   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1623   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1624   for (i=0; i<m; i++) {
1625     PetscInt row = rdest[i],rowner;
1626     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1627     for (j=ai[i]; j<ai[i+1]; j++) {
1628       PetscInt cowner,col = cdest[aj[j]];
1629       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1630       if (rowner == cowner) dnnz[i]++;
1631       else onnz[i]++;
1632     }
1633     for (j=bi[i]; j<bi[i+1]; j++) {
1634       PetscInt cowner,col = gcdest[bj[j]];
1635       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1636       if (rowner == cowner) dnnz[i]++;
1637       else onnz[i]++;
1638     }
1639   }
1640   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1641   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1642   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1643   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1644   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1645 
1646   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1647   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1648   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1649   for (i=0; i<m; i++) {
1650     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1651     PetscInt j0,rowlen;
1652     rowlen = ai[i+1] - ai[i];
1653     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1654       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1655       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1656     }
1657     rowlen = bi[i+1] - bi[i];
1658     for (j0=j=0; j<rowlen; j0=j) {
1659       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1660       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1661     }
1662   }
1663   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1664   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1665   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1666   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1667   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1668   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1669   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1670   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1671   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1672   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1673   *B = Aperm;
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1679 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1680 {
1681   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1682   PetscErrorCode ierr;
1683 
1684   PetscFunctionBegin;
1685   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1686   if (ghosts) *ghosts = aij->garray;
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1692 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1693 {
1694   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1695   Mat            A    = mat->A,B = mat->B;
1696   PetscErrorCode ierr;
1697   PetscReal      isend[5],irecv[5];
1698 
1699   PetscFunctionBegin;
1700   info->block_size = 1.0;
1701   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1702 
1703   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1704   isend[3] = info->memory;  isend[4] = info->mallocs;
1705 
1706   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1707 
1708   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1709   isend[3] += info->memory;  isend[4] += info->mallocs;
1710   if (flag == MAT_LOCAL) {
1711     info->nz_used      = isend[0];
1712     info->nz_allocated = isend[1];
1713     info->nz_unneeded  = isend[2];
1714     info->memory       = isend[3];
1715     info->mallocs      = isend[4];
1716   } else if (flag == MAT_GLOBAL_MAX) {
1717     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1718 
1719     info->nz_used      = irecv[0];
1720     info->nz_allocated = irecv[1];
1721     info->nz_unneeded  = irecv[2];
1722     info->memory       = irecv[3];
1723     info->mallocs      = irecv[4];
1724   } else if (flag == MAT_GLOBAL_SUM) {
1725     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1726 
1727     info->nz_used      = irecv[0];
1728     info->nz_allocated = irecv[1];
1729     info->nz_unneeded  = irecv[2];
1730     info->memory       = irecv[3];
1731     info->mallocs      = irecv[4];
1732   }
1733   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1734   info->fill_ratio_needed = 0;
1735   info->factor_mallocs    = 0;
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatSetOption_MPIAIJ"
1741 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1742 {
1743   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1744   PetscErrorCode ierr;
1745 
1746   PetscFunctionBegin;
1747   switch (op) {
1748   case MAT_NEW_NONZERO_LOCATIONS:
1749   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1750   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1751   case MAT_KEEP_NONZERO_PATTERN:
1752   case MAT_NEW_NONZERO_LOCATION_ERR:
1753   case MAT_USE_INODES:
1754   case MAT_IGNORE_ZERO_ENTRIES:
1755     MatCheckPreallocated(A,1);
1756     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1757     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1758     break;
1759   case MAT_ROW_ORIENTED:
1760     MatCheckPreallocated(A,1);
1761     a->roworiented = flg;
1762 
1763     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1764     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1765     break;
1766   case MAT_NEW_DIAGONALS:
1767     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1768     break;
1769   case MAT_IGNORE_OFF_PROC_ENTRIES:
1770     a->donotstash = flg;
1771     break;
1772   case MAT_SPD:
1773     A->spd_set = PETSC_TRUE;
1774     A->spd     = flg;
1775     if (flg) {
1776       A->symmetric                  = PETSC_TRUE;
1777       A->structurally_symmetric     = PETSC_TRUE;
1778       A->symmetric_set              = PETSC_TRUE;
1779       A->structurally_symmetric_set = PETSC_TRUE;
1780     }
1781     break;
1782   case MAT_SYMMETRIC:
1783     MatCheckPreallocated(A,1);
1784     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1785     break;
1786   case MAT_STRUCTURALLY_SYMMETRIC:
1787     MatCheckPreallocated(A,1);
1788     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1789     break;
1790   case MAT_HERMITIAN:
1791     MatCheckPreallocated(A,1);
1792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1793     break;
1794   case MAT_SYMMETRY_ETERNAL:
1795     MatCheckPreallocated(A,1);
1796     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1797     break;
1798   default:
1799     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatGetRow_MPIAIJ"
1806 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1807 {
1808   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1809   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1810   PetscErrorCode ierr;
1811   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1812   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1813   PetscInt       *cmap,*idx_p;
1814 
1815   PetscFunctionBegin;
1816   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1817   mat->getrowactive = PETSC_TRUE;
1818 
1819   if (!mat->rowvalues && (idx || v)) {
1820     /*
1821         allocate enough space to hold information from the longest row.
1822     */
1823     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1824     PetscInt   max = 1,tmp;
1825     for (i=0; i<matin->rmap->n; i++) {
1826       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1827       if (max < tmp) max = tmp;
1828     }
1829     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1830   }
1831 
1832   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1833   lrow = row - rstart;
1834 
1835   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1836   if (!v)   {pvA = 0; pvB = 0;}
1837   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1838   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1839   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1840   nztot = nzA + nzB;
1841 
1842   cmap = mat->garray;
1843   if (v  || idx) {
1844     if (nztot) {
1845       /* Sort by increasing column numbers, assuming A and B already sorted */
1846       PetscInt imark = -1;
1847       if (v) {
1848         *v = v_p = mat->rowvalues;
1849         for (i=0; i<nzB; i++) {
1850           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1851           else break;
1852         }
1853         imark = i;
1854         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1855         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1856       }
1857       if (idx) {
1858         *idx = idx_p = mat->rowindices;
1859         if (imark > -1) {
1860           for (i=0; i<imark; i++) {
1861             idx_p[i] = cmap[cworkB[i]];
1862           }
1863         } else {
1864           for (i=0; i<nzB; i++) {
1865             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1866             else break;
1867           }
1868           imark = i;
1869         }
1870         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1871         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1872       }
1873     } else {
1874       if (idx) *idx = 0;
1875       if (v)   *v   = 0;
1876     }
1877   }
1878   *nz  = nztot;
1879   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1880   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1886 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1887 {
1888   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1889 
1890   PetscFunctionBegin;
1891   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1892   aij->getrowactive = PETSC_FALSE;
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatNorm_MPIAIJ"
1898 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1899 {
1900   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1901   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1902   PetscErrorCode ierr;
1903   PetscInt       i,j,cstart = mat->cmap->rstart;
1904   PetscReal      sum = 0.0;
1905   MatScalar      *v;
1906 
1907   PetscFunctionBegin;
1908   if (aij->size == 1) {
1909     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1910   } else {
1911     if (type == NORM_FROBENIUS) {
1912       v = amat->a;
1913       for (i=0; i<amat->nz; i++) {
1914         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1915       }
1916       v = bmat->a;
1917       for (i=0; i<bmat->nz; i++) {
1918         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1919       }
1920       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1921       *norm = PetscSqrtReal(*norm);
1922     } else if (type == NORM_1) { /* max column norm */
1923       PetscReal *tmp,*tmp2;
1924       PetscInt  *jj,*garray = aij->garray;
1925       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1926       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1927       *norm = 0.0;
1928       v     = amat->a; jj = amat->j;
1929       for (j=0; j<amat->nz; j++) {
1930         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1931       }
1932       v = bmat->a; jj = bmat->j;
1933       for (j=0; j<bmat->nz; j++) {
1934         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1935       }
1936       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1937       for (j=0; j<mat->cmap->N; j++) {
1938         if (tmp2[j] > *norm) *norm = tmp2[j];
1939       }
1940       ierr = PetscFree(tmp);CHKERRQ(ierr);
1941       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1942     } else if (type == NORM_INFINITY) { /* max row norm */
1943       PetscReal ntemp = 0.0;
1944       for (j=0; j<aij->A->rmap->n; j++) {
1945         v   = amat->a + amat->i[j];
1946         sum = 0.0;
1947         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1948           sum += PetscAbsScalar(*v); v++;
1949         }
1950         v = bmat->a + bmat->i[j];
1951         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1952           sum += PetscAbsScalar(*v); v++;
1953         }
1954         if (sum > ntemp) ntemp = sum;
1955       }
1956       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1957     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1958   }
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatTranspose_MPIAIJ"
1964 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1965 {
1966   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1967   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1968   PetscErrorCode ierr;
1969   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1970   PetscInt       cstart = A->cmap->rstart,ncol;
1971   Mat            B;
1972   MatScalar      *array;
1973 
1974   PetscFunctionBegin;
1975   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1976 
1977   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1978   ai = Aloc->i; aj = Aloc->j;
1979   bi = Bloc->i; bj = Bloc->j;
1980   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1981     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1982     PetscSFNode          *oloc;
1983     PETSC_UNUSED PetscSF sf;
1984 
1985     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1986     /* compute d_nnz for preallocation */
1987     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1988     for (i=0; i<ai[ma]; i++) {
1989       d_nnz[aj[i]]++;
1990       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1991     }
1992     /* compute local off-diagonal contributions */
1993     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1994     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1995     /* map those to global */
1996     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1997     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1998     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1999     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2000     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2001     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2002     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2003 
2004     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2005     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2006     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2007     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2008     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2009     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2010   } else {
2011     B    = *matout;
2012     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2013     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2014   }
2015 
2016   /* copy over the A part */
2017   array = Aloc->a;
2018   row   = A->rmap->rstart;
2019   for (i=0; i<ma; i++) {
2020     ncol = ai[i+1]-ai[i];
2021     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2022     row++;
2023     array += ncol; aj += ncol;
2024   }
2025   aj = Aloc->j;
2026   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2027 
2028   /* copy over the B part */
2029   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2030   array = Bloc->a;
2031   row   = A->rmap->rstart;
2032   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2033   cols_tmp = cols;
2034   for (i=0; i<mb; i++) {
2035     ncol = bi[i+1]-bi[i];
2036     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2037     row++;
2038     array += ncol; cols_tmp += ncol;
2039   }
2040   ierr = PetscFree(cols);CHKERRQ(ierr);
2041 
2042   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2043   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2044   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2045     *matout = B;
2046   } else {
2047     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2048   }
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2054 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2055 {
2056   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2057   Mat            a    = aij->A,b = aij->B;
2058   PetscErrorCode ierr;
2059   PetscInt       s1,s2,s3;
2060 
2061   PetscFunctionBegin;
2062   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2063   if (rr) {
2064     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2065     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2066     /* Overlap communication with computation. */
2067     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2068   }
2069   if (ll) {
2070     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2071     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2072     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2073   }
2074   /* scale  the diagonal block */
2075   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2076 
2077   if (rr) {
2078     /* Do a scatter end and then right scale the off-diagonal block */
2079     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2080     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2081   }
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2087 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2088 {
2089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2090   PetscErrorCode ierr;
2091 
2092   PetscFunctionBegin;
2093   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatEqual_MPIAIJ"
2099 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2100 {
2101   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2102   Mat            a,b,c,d;
2103   PetscBool      flg;
2104   PetscErrorCode ierr;
2105 
2106   PetscFunctionBegin;
2107   a = matA->A; b = matA->B;
2108   c = matB->A; d = matB->B;
2109 
2110   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2111   if (flg) {
2112     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2113   }
2114   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatCopy_MPIAIJ"
2120 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2121 {
2122   PetscErrorCode ierr;
2123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2124   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2125 
2126   PetscFunctionBegin;
2127   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2128   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2129     /* because of the column compression in the off-processor part of the matrix a->B,
2130        the number of columns in a->B and b->B may be different, hence we cannot call
2131        the MatCopy() directly on the two parts. If need be, we can provide a more
2132        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2133        then copying the submatrices */
2134     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2135   } else {
2136     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2137     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2138   }
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatSetUp_MPIAIJ"
2144 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2145 {
2146   PetscErrorCode ierr;
2147 
2148   PetscFunctionBegin;
2149   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 /*
2154    Computes the number of nonzeros per row needed for preallocation when X and Y
2155    have different nonzero structure.
2156 */
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2159 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2160 {
2161   PetscInt       i,j,k,nzx,nzy;
2162 
2163   PetscFunctionBegin;
2164   /* Set the number of nonzeros in the new matrix */
2165   for (i=0; i<m; i++) {
2166     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2167     nzx = xi[i+1] - xi[i];
2168     nzy = yi[i+1] - yi[i];
2169     nnz[i] = 0;
2170     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2171       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2172       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2173       nnz[i]++;
2174     }
2175     for (; k<nzy; k++) nnz[i]++;
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2181 #undef __FUNCT__
2182 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2183 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2184 {
2185   PetscErrorCode ierr;
2186   PetscInt       m = Y->rmap->N;
2187   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2188   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2189 
2190   PetscFunctionBegin;
2191   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatAXPY_MPIAIJ"
2197 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2198 {
2199   PetscErrorCode ierr;
2200   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2201   PetscBLASInt   bnz,one=1;
2202   Mat_SeqAIJ     *x,*y;
2203 
2204   PetscFunctionBegin;
2205   if (str == SAME_NONZERO_PATTERN) {
2206     PetscScalar alpha = a;
2207     x    = (Mat_SeqAIJ*)xx->A->data;
2208     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2209     y    = (Mat_SeqAIJ*)yy->A->data;
2210     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2211     x    = (Mat_SeqAIJ*)xx->B->data;
2212     y    = (Mat_SeqAIJ*)yy->B->data;
2213     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2214     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2215     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2216   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2217     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2218   } else {
2219     Mat      B;
2220     PetscInt *nnz_d,*nnz_o;
2221     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2222     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2223     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2224     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2225     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2226     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2227     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2228     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2229     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2230     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2231     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2232     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2233     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2234     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2235   }
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2240 
2241 #undef __FUNCT__
2242 #define __FUNCT__ "MatConjugate_MPIAIJ"
2243 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2244 {
2245 #if defined(PETSC_USE_COMPLEX)
2246   PetscErrorCode ierr;
2247   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2248 
2249   PetscFunctionBegin;
2250   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2251   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2252 #else
2253   PetscFunctionBegin;
2254 #endif
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatRealPart_MPIAIJ"
2260 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2261 {
2262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2263   PetscErrorCode ierr;
2264 
2265   PetscFunctionBegin;
2266   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2267   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2273 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2274 {
2275   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2276   PetscErrorCode ierr;
2277 
2278   PetscFunctionBegin;
2279   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2280   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2286 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2287 {
2288   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2289   PetscErrorCode ierr;
2290   PetscInt       i,*idxb = 0;
2291   PetscScalar    *va,*vb;
2292   Vec            vtmp;
2293 
2294   PetscFunctionBegin;
2295   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2296   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2297   if (idx) {
2298     for (i=0; i<A->rmap->n; i++) {
2299       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2300     }
2301   }
2302 
2303   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2304   if (idx) {
2305     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2306   }
2307   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2308   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2309 
2310   for (i=0; i<A->rmap->n; i++) {
2311     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2312       va[i] = vb[i];
2313       if (idx) idx[i] = a->garray[idxb[i]];
2314     }
2315   }
2316 
2317   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2318   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2319   ierr = PetscFree(idxb);CHKERRQ(ierr);
2320   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 #undef __FUNCT__
2325 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2326 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2327 {
2328   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2329   PetscErrorCode ierr;
2330   PetscInt       i,*idxb = 0;
2331   PetscScalar    *va,*vb;
2332   Vec            vtmp;
2333 
2334   PetscFunctionBegin;
2335   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2336   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2337   if (idx) {
2338     for (i=0; i<A->cmap->n; i++) {
2339       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2340     }
2341   }
2342 
2343   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2344   if (idx) {
2345     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2346   }
2347   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2348   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2349 
2350   for (i=0; i<A->rmap->n; i++) {
2351     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2352       va[i] = vb[i];
2353       if (idx) idx[i] = a->garray[idxb[i]];
2354     }
2355   }
2356 
2357   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2358   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2359   ierr = PetscFree(idxb);CHKERRQ(ierr);
2360   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2366 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2367 {
2368   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2369   PetscInt       n      = A->rmap->n;
2370   PetscInt       cstart = A->cmap->rstart;
2371   PetscInt       *cmap  = mat->garray;
2372   PetscInt       *diagIdx, *offdiagIdx;
2373   Vec            diagV, offdiagV;
2374   PetscScalar    *a, *diagA, *offdiagA;
2375   PetscInt       r;
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2380   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2381   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2382   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2383   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2384   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2385   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2386   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2387   for (r = 0; r < n; ++r) {
2388     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2389       a[r]   = diagA[r];
2390       idx[r] = cstart + diagIdx[r];
2391     } else {
2392       a[r]   = offdiagA[r];
2393       idx[r] = cmap[offdiagIdx[r]];
2394     }
2395   }
2396   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2397   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2398   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2399   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2400   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2401   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2407 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2408 {
2409   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2410   PetscInt       n      = A->rmap->n;
2411   PetscInt       cstart = A->cmap->rstart;
2412   PetscInt       *cmap  = mat->garray;
2413   PetscInt       *diagIdx, *offdiagIdx;
2414   Vec            diagV, offdiagV;
2415   PetscScalar    *a, *diagA, *offdiagA;
2416   PetscInt       r;
2417   PetscErrorCode ierr;
2418 
2419   PetscFunctionBegin;
2420   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2421   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2422   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2423   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2424   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2425   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2426   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2427   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2428   for (r = 0; r < n; ++r) {
2429     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2430       a[r]   = diagA[r];
2431       idx[r] = cstart + diagIdx[r];
2432     } else {
2433       a[r]   = offdiagA[r];
2434       idx[r] = cmap[offdiagIdx[r]];
2435     }
2436   }
2437   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2438   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2439   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2440   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2441   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2442   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 #undef __FUNCT__
2447 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2448 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2449 {
2450   PetscErrorCode ierr;
2451   Mat            *dummy;
2452 
2453   PetscFunctionBegin;
2454   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2455   *newmat = *dummy;
2456   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 #undef __FUNCT__
2461 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2462 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2463 {
2464   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2465   PetscErrorCode ierr;
2466 
2467   PetscFunctionBegin;
2468   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2469   A->errortype = a->A->errortype;
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2475 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2476 {
2477   PetscErrorCode ierr;
2478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2479 
2480   PetscFunctionBegin;
2481   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2482   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2483   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2484   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 #undef __FUNCT__
2489 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2490 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2491 {
2492   PetscFunctionBegin;
2493   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2494   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 #undef __FUNCT__
2499 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2500 /*@
2501    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2502 
2503    Collective on Mat
2504 
2505    Input Parameters:
2506 +    A - the matrix
2507 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2508 
2509  Level: advanced
2510 
2511 @*/
2512 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2513 {
2514   PetscErrorCode       ierr;
2515 
2516   PetscFunctionBegin;
2517   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 #undef __FUNCT__
2522 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2523 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2524 {
2525   PetscErrorCode       ierr;
2526   PetscBool            sc = PETSC_FALSE,flg;
2527 
2528   PetscFunctionBegin;
2529   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2530   ierr = PetscObjectOptionsBegin((PetscObject)A);
2531     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2532     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2533     if (flg) {
2534       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2535     }
2536   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatShift_MPIAIJ"
2542 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2543 {
2544   PetscErrorCode ierr;
2545   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2546   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2547 
2548   PetscFunctionBegin;
2549   if (!Y->preallocated) {
2550     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2551   } else if (!aij->nz) {
2552     PetscInt nonew = aij->nonew;
2553     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2554     aij->nonew = nonew;
2555   }
2556   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2562 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2563 {
2564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2569   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2570   if (d) {
2571     PetscInt rstart;
2572     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2573     *d += rstart;
2574 
2575   }
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 
2580 /* -------------------------------------------------------------------*/
2581 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2582                                        MatGetRow_MPIAIJ,
2583                                        MatRestoreRow_MPIAIJ,
2584                                        MatMult_MPIAIJ,
2585                                 /* 4*/ MatMultAdd_MPIAIJ,
2586                                        MatMultTranspose_MPIAIJ,
2587                                        MatMultTransposeAdd_MPIAIJ,
2588                                        0,
2589                                        0,
2590                                        0,
2591                                 /*10*/ 0,
2592                                        0,
2593                                        0,
2594                                        MatSOR_MPIAIJ,
2595                                        MatTranspose_MPIAIJ,
2596                                 /*15*/ MatGetInfo_MPIAIJ,
2597                                        MatEqual_MPIAIJ,
2598                                        MatGetDiagonal_MPIAIJ,
2599                                        MatDiagonalScale_MPIAIJ,
2600                                        MatNorm_MPIAIJ,
2601                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2602                                        MatAssemblyEnd_MPIAIJ,
2603                                        MatSetOption_MPIAIJ,
2604                                        MatZeroEntries_MPIAIJ,
2605                                 /*24*/ MatZeroRows_MPIAIJ,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                 /*29*/ MatSetUp_MPIAIJ,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                 /*34*/ MatDuplicate_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                 /*39*/ MatAXPY_MPIAIJ,
2621                                        MatGetSubMatrices_MPIAIJ,
2622                                        MatIncreaseOverlap_MPIAIJ,
2623                                        MatGetValues_MPIAIJ,
2624                                        MatCopy_MPIAIJ,
2625                                 /*44*/ MatGetRowMax_MPIAIJ,
2626                                        MatScale_MPIAIJ,
2627                                        MatShift_MPIAIJ,
2628                                        MatDiagonalSet_MPIAIJ,
2629                                        MatZeroRowsColumns_MPIAIJ,
2630                                 /*49*/ MatSetRandom_MPIAIJ,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2636                                        0,
2637                                        MatSetUnfactored_MPIAIJ,
2638                                        MatPermute_MPIAIJ,
2639                                        0,
2640                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2641                                        MatDestroy_MPIAIJ,
2642                                        MatView_MPIAIJ,
2643                                        0,
2644                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2645                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2646                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2651                                        MatGetRowMinAbs_MPIAIJ,
2652                                        0,
2653                                        MatSetColoring_MPIAIJ,
2654                                        0,
2655                                        MatSetValuesAdifor_MPIAIJ,
2656                                 /*75*/ MatFDColoringApply_AIJ,
2657                                        MatSetFromOptions_MPIAIJ,
2658                                        0,
2659                                        0,
2660                                        MatFindZeroDiagonals_MPIAIJ,
2661                                 /*80*/ 0,
2662                                        0,
2663                                        0,
2664                                 /*83*/ MatLoad_MPIAIJ,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2671                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2672                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2673                                        MatPtAP_MPIAIJ_MPIAIJ,
2674                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2675                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                 /*99*/ 0,
2681                                        0,
2682                                        0,
2683                                        MatConjugate_MPIAIJ,
2684                                        0,
2685                                 /*104*/MatSetValuesRow_MPIAIJ,
2686                                        MatRealPart_MPIAIJ,
2687                                        MatImaginaryPart_MPIAIJ,
2688                                        0,
2689                                        0,
2690                                 /*109*/0,
2691                                        0,
2692                                        MatGetRowMin_MPIAIJ,
2693                                        0,
2694                                        MatMissingDiagonal_MPIAIJ,
2695                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2696                                        0,
2697                                        MatGetGhosts_MPIAIJ,
2698                                        0,
2699                                        0,
2700                                 /*119*/0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                        MatGetMultiProcBlock_MPIAIJ,
2705                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2706                                        MatGetColumnNorms_MPIAIJ,
2707                                        MatInvertBlockDiagonal_MPIAIJ,
2708                                        0,
2709                                        MatGetSubMatricesMPI_MPIAIJ,
2710                                 /*129*/0,
2711                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2712                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2713                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2714                                        0,
2715                                 /*134*/0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                 /*139*/0,
2721                                        0,
2722                                        0,
2723                                        MatFDColoringSetUp_MPIXAIJ,
2724                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2725                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2726 };
2727 
2728 /* ----------------------------------------------------------------------------------------*/
2729 
2730 #undef __FUNCT__
2731 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2732 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2733 {
2734   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2735   PetscErrorCode ierr;
2736 
2737   PetscFunctionBegin;
2738   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2739   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 #undef __FUNCT__
2744 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2745 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2746 {
2747   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2748   PetscErrorCode ierr;
2749 
2750   PetscFunctionBegin;
2751   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2752   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2753   PetscFunctionReturn(0);
2754 }
2755 
2756 #undef __FUNCT__
2757 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2758 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2759 {
2760   Mat_MPIAIJ     *b;
2761   PetscErrorCode ierr;
2762 
2763   PetscFunctionBegin;
2764   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2765   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2766   b = (Mat_MPIAIJ*)B->data;
2767 
2768   if (!B->preallocated) {
2769     /* Explicitly create 2 MATSEQAIJ matrices. */
2770     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2771     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2772     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2773     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2774     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2775     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2776     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2777     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2778     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2779     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2780   }
2781 
2782   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2783   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2784   B->preallocated = PETSC_TRUE;
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 #undef __FUNCT__
2789 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2790 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2791 {
2792   Mat            mat;
2793   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2794   PetscErrorCode ierr;
2795 
2796   PetscFunctionBegin;
2797   *newmat = 0;
2798   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2799   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2800   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2801   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2802   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2803   a       = (Mat_MPIAIJ*)mat->data;
2804 
2805   mat->factortype   = matin->factortype;
2806   mat->assembled    = PETSC_TRUE;
2807   mat->insertmode   = NOT_SET_VALUES;
2808   mat->preallocated = PETSC_TRUE;
2809 
2810   a->size         = oldmat->size;
2811   a->rank         = oldmat->rank;
2812   a->donotstash   = oldmat->donotstash;
2813   a->roworiented  = oldmat->roworiented;
2814   a->rowindices   = 0;
2815   a->rowvalues    = 0;
2816   a->getrowactive = PETSC_FALSE;
2817 
2818   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2819   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2820 
2821   if (oldmat->colmap) {
2822 #if defined(PETSC_USE_CTABLE)
2823     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2824 #else
2825     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2826     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2827     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2828 #endif
2829   } else a->colmap = 0;
2830   if (oldmat->garray) {
2831     PetscInt len;
2832     len  = oldmat->B->cmap->n;
2833     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2834     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2835     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2836   } else a->garray = 0;
2837 
2838   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2839   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2840   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2841   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2842   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2843   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2844   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2845   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2846   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2847   *newmat = mat;
2848   PetscFunctionReturn(0);
2849 }
2850 
2851 
2852 
2853 #undef __FUNCT__
2854 #define __FUNCT__ "MatLoad_MPIAIJ"
2855 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2856 {
2857   PetscScalar    *vals,*svals;
2858   MPI_Comm       comm;
2859   PetscErrorCode ierr;
2860   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2861   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2862   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2863   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2864   PetscInt       cend,cstart,n,*rowners;
2865   int            fd;
2866   PetscInt       bs = newMat->rmap->bs;
2867 
2868   PetscFunctionBegin;
2869   /* force binary viewer to load .info file if it has not yet done so */
2870   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2871   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2872   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2873   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2874   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2875   if (!rank) {
2876     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2877     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2878   }
2879 
2880   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2881   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2882   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2883   if (bs < 0) bs = 1;
2884 
2885   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2886   M    = header[1]; N = header[2];
2887 
2888   /* If global sizes are set, check if they are consistent with that given in the file */
2889   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2890   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2891 
2892   /* determine ownership of all (block) rows */
2893   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2894   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2895   else m = newMat->rmap->n; /* Set by user */
2896 
2897   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2898   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2899 
2900   /* First process needs enough room for process with most rows */
2901   if (!rank) {
2902     mmax = rowners[1];
2903     for (i=2; i<=size; i++) {
2904       mmax = PetscMax(mmax, rowners[i]);
2905     }
2906   } else mmax = -1;             /* unused, but compilers complain */
2907 
2908   rowners[0] = 0;
2909   for (i=2; i<=size; i++) {
2910     rowners[i] += rowners[i-1];
2911   }
2912   rstart = rowners[rank];
2913   rend   = rowners[rank+1];
2914 
2915   /* distribute row lengths to all processors */
2916   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2917   if (!rank) {
2918     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2919     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2920     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2921     for (j=0; j<m; j++) {
2922       procsnz[0] += ourlens[j];
2923     }
2924     for (i=1; i<size; i++) {
2925       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2926       /* calculate the number of nonzeros on each processor */
2927       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2928         procsnz[i] += rowlengths[j];
2929       }
2930       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2931     }
2932     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2933   } else {
2934     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2935   }
2936 
2937   if (!rank) {
2938     /* determine max buffer needed and allocate it */
2939     maxnz = 0;
2940     for (i=0; i<size; i++) {
2941       maxnz = PetscMax(maxnz,procsnz[i]);
2942     }
2943     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2944 
2945     /* read in my part of the matrix column indices  */
2946     nz   = procsnz[0];
2947     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2948     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2949 
2950     /* read in every one elses and ship off */
2951     for (i=1; i<size; i++) {
2952       nz   = procsnz[i];
2953       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2954       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2955     }
2956     ierr = PetscFree(cols);CHKERRQ(ierr);
2957   } else {
2958     /* determine buffer space needed for message */
2959     nz = 0;
2960     for (i=0; i<m; i++) {
2961       nz += ourlens[i];
2962     }
2963     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2964 
2965     /* receive message of column indices*/
2966     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2967   }
2968 
2969   /* determine column ownership if matrix is not square */
2970   if (N != M) {
2971     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2972     else n = newMat->cmap->n;
2973     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2974     cstart = cend - n;
2975   } else {
2976     cstart = rstart;
2977     cend   = rend;
2978     n      = cend - cstart;
2979   }
2980 
2981   /* loop over local rows, determining number of off diagonal entries */
2982   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2983   jj   = 0;
2984   for (i=0; i<m; i++) {
2985     for (j=0; j<ourlens[i]; j++) {
2986       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2987       jj++;
2988     }
2989   }
2990 
2991   for (i=0; i<m; i++) {
2992     ourlens[i] -= offlens[i];
2993   }
2994   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2995 
2996   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2997 
2998   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2999 
3000   for (i=0; i<m; i++) {
3001     ourlens[i] += offlens[i];
3002   }
3003 
3004   if (!rank) {
3005     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3006 
3007     /* read in my part of the matrix numerical values  */
3008     nz   = procsnz[0];
3009     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3010 
3011     /* insert into matrix */
3012     jj      = rstart;
3013     smycols = mycols;
3014     svals   = vals;
3015     for (i=0; i<m; i++) {
3016       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3017       smycols += ourlens[i];
3018       svals   += ourlens[i];
3019       jj++;
3020     }
3021 
3022     /* read in other processors and ship out */
3023     for (i=1; i<size; i++) {
3024       nz   = procsnz[i];
3025       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3026       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3027     }
3028     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3029   } else {
3030     /* receive numeric values */
3031     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3032 
3033     /* receive message of values*/
3034     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3035 
3036     /* insert into matrix */
3037     jj      = rstart;
3038     smycols = mycols;
3039     svals   = vals;
3040     for (i=0; i<m; i++) {
3041       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3042       smycols += ourlens[i];
3043       svals   += ourlens[i];
3044       jj++;
3045     }
3046   }
3047   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3048   ierr = PetscFree(vals);CHKERRQ(ierr);
3049   ierr = PetscFree(mycols);CHKERRQ(ierr);
3050   ierr = PetscFree(rowners);CHKERRQ(ierr);
3051   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3052   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3053   PetscFunctionReturn(0);
3054 }
3055 
3056 #undef __FUNCT__
3057 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3058 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3059 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3060 {
3061   PetscErrorCode ierr;
3062   IS             iscol_local;
3063   PetscInt       csize;
3064 
3065   PetscFunctionBegin;
3066   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3067   if (call == MAT_REUSE_MATRIX) {
3068     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3069     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3070   } else {
3071     /* check if we are grabbing all columns*/
3072     PetscBool    isstride;
3073     PetscMPIInt  lisstride = 0,gisstride;
3074     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3075     if (isstride) {
3076       PetscInt  start,len,mstart,mlen;
3077       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3078       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3079       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3080       if (mstart == start && mlen-mstart == len) lisstride = 1;
3081     }
3082     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3083     if (gisstride) {
3084       PetscInt N;
3085       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3086       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3087       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3088       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3089     } else {
3090       PetscInt cbs;
3091       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3092       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3093       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3094     }
3095   }
3096   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3097   if (call == MAT_INITIAL_MATRIX) {
3098     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3099     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3100   }
3101   PetscFunctionReturn(0);
3102 }
3103 
3104 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3105 #undef __FUNCT__
3106 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3107 /*
3108     Not great since it makes two copies of the submatrix, first an SeqAIJ
3109   in local and then by concatenating the local matrices the end result.
3110   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3111 
3112   Note: This requires a sequential iscol with all indices.
3113 */
3114 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3115 {
3116   PetscErrorCode ierr;
3117   PetscMPIInt    rank,size;
3118   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3119   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3120   PetscBool      allcolumns, colflag;
3121   Mat            M,Mreuse;
3122   MatScalar      *vwork,*aa;
3123   MPI_Comm       comm;
3124   Mat_SeqAIJ     *aij;
3125 
3126   PetscFunctionBegin;
3127   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3128   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3129   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3130 
3131   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3132   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3133   if (colflag && ncol == mat->cmap->N) {
3134     allcolumns = PETSC_TRUE;
3135     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3136   } else {
3137     allcolumns = PETSC_FALSE;
3138   }
3139   if (call ==  MAT_REUSE_MATRIX) {
3140     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3141     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3142     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3143   } else {
3144     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3145   }
3146 
3147   /*
3148       m - number of local rows
3149       n - number of columns (same on all processors)
3150       rstart - first row in new global matrix generated
3151   */
3152   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3153   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3154   if (call == MAT_INITIAL_MATRIX) {
3155     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3156     ii  = aij->i;
3157     jj  = aij->j;
3158 
3159     /*
3160         Determine the number of non-zeros in the diagonal and off-diagonal
3161         portions of the matrix in order to do correct preallocation
3162     */
3163 
3164     /* first get start and end of "diagonal" columns */
3165     if (csize == PETSC_DECIDE) {
3166       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3167       if (mglobal == n) { /* square matrix */
3168         nlocal = m;
3169       } else {
3170         nlocal = n/size + ((n % size) > rank);
3171       }
3172     } else {
3173       nlocal = csize;
3174     }
3175     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3176     rstart = rend - nlocal;
3177     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3178 
3179     /* next, compute all the lengths */
3180     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3181     olens = dlens + m;
3182     for (i=0; i<m; i++) {
3183       jend = ii[i+1] - ii[i];
3184       olen = 0;
3185       dlen = 0;
3186       for (j=0; j<jend; j++) {
3187         if (*jj < rstart || *jj >= rend) olen++;
3188         else dlen++;
3189         jj++;
3190       }
3191       olens[i] = olen;
3192       dlens[i] = dlen;
3193     }
3194     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3195     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3196     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3197     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3198     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3199     ierr = PetscFree(dlens);CHKERRQ(ierr);
3200   } else {
3201     PetscInt ml,nl;
3202 
3203     M    = *newmat;
3204     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3205     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3206     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3207     /*
3208          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3209        rather than the slower MatSetValues().
3210     */
3211     M->was_assembled = PETSC_TRUE;
3212     M->assembled     = PETSC_FALSE;
3213   }
3214   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3215   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3216   ii   = aij->i;
3217   jj   = aij->j;
3218   aa   = aij->a;
3219   for (i=0; i<m; i++) {
3220     row   = rstart + i;
3221     nz    = ii[i+1] - ii[i];
3222     cwork = jj;     jj += nz;
3223     vwork = aa;     aa += nz;
3224     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3225   }
3226 
3227   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3228   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3229   *newmat = M;
3230 
3231   /* save submatrix used in processor for next request */
3232   if (call ==  MAT_INITIAL_MATRIX) {
3233     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3234     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3235   }
3236   PetscFunctionReturn(0);
3237 }
3238 
3239 #undef __FUNCT__
3240 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3241 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3242 {
3243   PetscInt       m,cstart, cend,j,nnz,i,d;
3244   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3245   const PetscInt *JJ;
3246   PetscScalar    *values;
3247   PetscErrorCode ierr;
3248 
3249   PetscFunctionBegin;
3250   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3251 
3252   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3253   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3254   m      = B->rmap->n;
3255   cstart = B->cmap->rstart;
3256   cend   = B->cmap->rend;
3257   rstart = B->rmap->rstart;
3258 
3259   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3260 
3261 #if defined(PETSC_USE_DEBUGGING)
3262   for (i=0; i<m; i++) {
3263     nnz = Ii[i+1]- Ii[i];
3264     JJ  = J + Ii[i];
3265     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3266     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3267     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3268   }
3269 #endif
3270 
3271   for (i=0; i<m; i++) {
3272     nnz     = Ii[i+1]- Ii[i];
3273     JJ      = J + Ii[i];
3274     nnz_max = PetscMax(nnz_max,nnz);
3275     d       = 0;
3276     for (j=0; j<nnz; j++) {
3277       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3278     }
3279     d_nnz[i] = d;
3280     o_nnz[i] = nnz - d;
3281   }
3282   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3283   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3284 
3285   if (v) values = (PetscScalar*)v;
3286   else {
3287     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3288   }
3289 
3290   for (i=0; i<m; i++) {
3291     ii   = i + rstart;
3292     nnz  = Ii[i+1]- Ii[i];
3293     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3294   }
3295   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3296   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3297 
3298   if (!v) {
3299     ierr = PetscFree(values);CHKERRQ(ierr);
3300   }
3301   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3302   PetscFunctionReturn(0);
3303 }
3304 
3305 #undef __FUNCT__
3306 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3307 /*@
3308    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3309    (the default parallel PETSc format).
3310 
3311    Collective on MPI_Comm
3312 
3313    Input Parameters:
3314 +  B - the matrix
3315 .  i - the indices into j for the start of each local row (starts with zero)
3316 .  j - the column indices for each local row (starts with zero)
3317 -  v - optional values in the matrix
3318 
3319    Level: developer
3320 
3321    Notes:
3322        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3323      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3324      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3325 
3326        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3327 
3328        The format which is used for the sparse matrix input, is equivalent to a
3329     row-major ordering.. i.e for the following matrix, the input data expected is
3330     as shown
3331 
3332 $        1 0 0
3333 $        2 0 3     P0
3334 $       -------
3335 $        4 5 6     P1
3336 $
3337 $     Process0 [P0]: rows_owned=[0,1]
3338 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3339 $        j =  {0,0,2}  [size = 3]
3340 $        v =  {1,2,3}  [size = 3]
3341 $
3342 $     Process1 [P1]: rows_owned=[2]
3343 $        i =  {0,3}    [size = nrow+1  = 1+1]
3344 $        j =  {0,1,2}  [size = 3]
3345 $        v =  {4,5,6}  [size = 3]
3346 
3347 .keywords: matrix, aij, compressed row, sparse, parallel
3348 
3349 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3350           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3351 @*/
3352 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3353 {
3354   PetscErrorCode ierr;
3355 
3356   PetscFunctionBegin;
3357   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3358   PetscFunctionReturn(0);
3359 }
3360 
3361 #undef __FUNCT__
3362 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3363 /*@C
3364    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3365    (the default parallel PETSc format).  For good matrix assembly performance
3366    the user should preallocate the matrix storage by setting the parameters
3367    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3368    performance can be increased by more than a factor of 50.
3369 
3370    Collective on MPI_Comm
3371 
3372    Input Parameters:
3373 +  B - the matrix
3374 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3375            (same value is used for all local rows)
3376 .  d_nnz - array containing the number of nonzeros in the various rows of the
3377            DIAGONAL portion of the local submatrix (possibly different for each row)
3378            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3379            The size of this array is equal to the number of local rows, i.e 'm'.
3380            For matrices that will be factored, you must leave room for (and set)
3381            the diagonal entry even if it is zero.
3382 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3383            submatrix (same value is used for all local rows).
3384 -  o_nnz - array containing the number of nonzeros in the various rows of the
3385            OFF-DIAGONAL portion of the local submatrix (possibly different for
3386            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3387            structure. The size of this array is equal to the number
3388            of local rows, i.e 'm'.
3389 
3390    If the *_nnz parameter is given then the *_nz parameter is ignored
3391 
3392    The AIJ format (also called the Yale sparse matrix format or
3393    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3394    storage.  The stored row and column indices begin with zero.
3395    See Users-Manual: ch_mat for details.
3396 
3397    The parallel matrix is partitioned such that the first m0 rows belong to
3398    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3399    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3400 
3401    The DIAGONAL portion of the local submatrix of a processor can be defined
3402    as the submatrix which is obtained by extraction the part corresponding to
3403    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3404    first row that belongs to the processor, r2 is the last row belonging to
3405    the this processor, and c1-c2 is range of indices of the local part of a
3406    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3407    common case of a square matrix, the row and column ranges are the same and
3408    the DIAGONAL part is also square. The remaining portion of the local
3409    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3410 
3411    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3412 
3413    You can call MatGetInfo() to get information on how effective the preallocation was;
3414    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3415    You can also run with the option -info and look for messages with the string
3416    malloc in them to see if additional memory allocation was needed.
3417 
3418    Example usage:
3419 
3420    Consider the following 8x8 matrix with 34 non-zero values, that is
3421    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3422    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3423    as follows:
3424 
3425 .vb
3426             1  2  0  |  0  3  0  |  0  4
3427     Proc0   0  5  6  |  7  0  0  |  8  0
3428             9  0 10  | 11  0  0  | 12  0
3429     -------------------------------------
3430            13  0 14  | 15 16 17  |  0  0
3431     Proc1   0 18  0  | 19 20 21  |  0  0
3432             0  0  0  | 22 23  0  | 24  0
3433     -------------------------------------
3434     Proc2  25 26 27  |  0  0 28  | 29  0
3435            30  0  0  | 31 32 33  |  0 34
3436 .ve
3437 
3438    This can be represented as a collection of submatrices as:
3439 
3440 .vb
3441       A B C
3442       D E F
3443       G H I
3444 .ve
3445 
3446    Where the submatrices A,B,C are owned by proc0, D,E,F are
3447    owned by proc1, G,H,I are owned by proc2.
3448 
3449    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3450    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3451    The 'M','N' parameters are 8,8, and have the same values on all procs.
3452 
3453    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3454    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3455    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3456    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3457    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3458    matrix, ans [DF] as another SeqAIJ matrix.
3459 
3460    When d_nz, o_nz parameters are specified, d_nz storage elements are
3461    allocated for every row of the local diagonal submatrix, and o_nz
3462    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3463    One way to choose d_nz and o_nz is to use the max nonzerors per local
3464    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3465    In this case, the values of d_nz,o_nz are:
3466 .vb
3467      proc0 : dnz = 2, o_nz = 2
3468      proc1 : dnz = 3, o_nz = 2
3469      proc2 : dnz = 1, o_nz = 4
3470 .ve
3471    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3472    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3473    for proc3. i.e we are using 12+15+10=37 storage locations to store
3474    34 values.
3475 
3476    When d_nnz, o_nnz parameters are specified, the storage is specified
3477    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3478    In the above case the values for d_nnz,o_nnz are:
3479 .vb
3480      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3481      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3482      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3483 .ve
3484    Here the space allocated is sum of all the above values i.e 34, and
3485    hence pre-allocation is perfect.
3486 
3487    Level: intermediate
3488 
3489 .keywords: matrix, aij, compressed row, sparse, parallel
3490 
3491 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3492           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3493 @*/
3494 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3495 {
3496   PetscErrorCode ierr;
3497 
3498   PetscFunctionBegin;
3499   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3500   PetscValidType(B,1);
3501   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3502   PetscFunctionReturn(0);
3503 }
3504 
3505 #undef __FUNCT__
3506 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3507 /*@
3508      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3509          CSR format the local rows.
3510 
3511    Collective on MPI_Comm
3512 
3513    Input Parameters:
3514 +  comm - MPI communicator
3515 .  m - number of local rows (Cannot be PETSC_DECIDE)
3516 .  n - This value should be the same as the local size used in creating the
3517        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3518        calculated if N is given) For square matrices n is almost always m.
3519 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3520 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3521 .   i - row indices
3522 .   j - column indices
3523 -   a - matrix values
3524 
3525    Output Parameter:
3526 .   mat - the matrix
3527 
3528    Level: intermediate
3529 
3530    Notes:
3531        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3532      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3533      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3534 
3535        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3536 
3537        The format which is used for the sparse matrix input, is equivalent to a
3538     row-major ordering.. i.e for the following matrix, the input data expected is
3539     as shown
3540 
3541 $        1 0 0
3542 $        2 0 3     P0
3543 $       -------
3544 $        4 5 6     P1
3545 $
3546 $     Process0 [P0]: rows_owned=[0,1]
3547 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3548 $        j =  {0,0,2}  [size = 3]
3549 $        v =  {1,2,3}  [size = 3]
3550 $
3551 $     Process1 [P1]: rows_owned=[2]
3552 $        i =  {0,3}    [size = nrow+1  = 1+1]
3553 $        j =  {0,1,2}  [size = 3]
3554 $        v =  {4,5,6}  [size = 3]
3555 
3556 .keywords: matrix, aij, compressed row, sparse, parallel
3557 
3558 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3559           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3560 @*/
3561 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3562 {
3563   PetscErrorCode ierr;
3564 
3565   PetscFunctionBegin;
3566   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3567   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3568   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3569   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3570   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3571   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3572   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3573   PetscFunctionReturn(0);
3574 }
3575 
3576 #undef __FUNCT__
3577 #define __FUNCT__ "MatCreateAIJ"
3578 /*@C
3579    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3580    (the default parallel PETSc format).  For good matrix assembly performance
3581    the user should preallocate the matrix storage by setting the parameters
3582    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3583    performance can be increased by more than a factor of 50.
3584 
3585    Collective on MPI_Comm
3586 
3587    Input Parameters:
3588 +  comm - MPI communicator
3589 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3590            This value should be the same as the local size used in creating the
3591            y vector for the matrix-vector product y = Ax.
3592 .  n - This value should be the same as the local size used in creating the
3593        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3594        calculated if N is given) For square matrices n is almost always m.
3595 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3596 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3597 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3598            (same value is used for all local rows)
3599 .  d_nnz - array containing the number of nonzeros in the various rows of the
3600            DIAGONAL portion of the local submatrix (possibly different for each row)
3601            or NULL, if d_nz is used to specify the nonzero structure.
3602            The size of this array is equal to the number of local rows, i.e 'm'.
3603 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3604            submatrix (same value is used for all local rows).
3605 -  o_nnz - array containing the number of nonzeros in the various rows of the
3606            OFF-DIAGONAL portion of the local submatrix (possibly different for
3607            each row) or NULL, if o_nz is used to specify the nonzero
3608            structure. The size of this array is equal to the number
3609            of local rows, i.e 'm'.
3610 
3611    Output Parameter:
3612 .  A - the matrix
3613 
3614    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3615    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3616    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3617 
3618    Notes:
3619    If the *_nnz parameter is given then the *_nz parameter is ignored
3620 
3621    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3622    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3623    storage requirements for this matrix.
3624 
3625    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3626    processor than it must be used on all processors that share the object for
3627    that argument.
3628 
3629    The user MUST specify either the local or global matrix dimensions
3630    (possibly both).
3631 
3632    The parallel matrix is partitioned across processors such that the
3633    first m0 rows belong to process 0, the next m1 rows belong to
3634    process 1, the next m2 rows belong to process 2 etc.. where
3635    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3636    values corresponding to [m x N] submatrix.
3637 
3638    The columns are logically partitioned with the n0 columns belonging
3639    to 0th partition, the next n1 columns belonging to the next
3640    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3641 
3642    The DIAGONAL portion of the local submatrix on any given processor
3643    is the submatrix corresponding to the rows and columns m,n
3644    corresponding to the given processor. i.e diagonal matrix on
3645    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3646    etc. The remaining portion of the local submatrix [m x (N-n)]
3647    constitute the OFF-DIAGONAL portion. The example below better
3648    illustrates this concept.
3649 
3650    For a square global matrix we define each processor's diagonal portion
3651    to be its local rows and the corresponding columns (a square submatrix);
3652    each processor's off-diagonal portion encompasses the remainder of the
3653    local matrix (a rectangular submatrix).
3654 
3655    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3656 
3657    When calling this routine with a single process communicator, a matrix of
3658    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3659    type of communicator, use the construction mechanism:
3660      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3661 
3662    By default, this format uses inodes (identical nodes) when possible.
3663    We search for consecutive rows with the same nonzero structure, thereby
3664    reusing matrix information to achieve increased efficiency.
3665 
3666    Options Database Keys:
3667 +  -mat_no_inode  - Do not use inodes
3668 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3669 -  -mat_aij_oneindex - Internally use indexing starting at 1
3670         rather than 0.  Note that when calling MatSetValues(),
3671         the user still MUST index entries starting at 0!
3672 
3673 
3674    Example usage:
3675 
3676    Consider the following 8x8 matrix with 34 non-zero values, that is
3677    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3678    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3679    as follows:
3680 
3681 .vb
3682             1  2  0  |  0  3  0  |  0  4
3683     Proc0   0  5  6  |  7  0  0  |  8  0
3684             9  0 10  | 11  0  0  | 12  0
3685     -------------------------------------
3686            13  0 14  | 15 16 17  |  0  0
3687     Proc1   0 18  0  | 19 20 21  |  0  0
3688             0  0  0  | 22 23  0  | 24  0
3689     -------------------------------------
3690     Proc2  25 26 27  |  0  0 28  | 29  0
3691            30  0  0  | 31 32 33  |  0 34
3692 .ve
3693 
3694    This can be represented as a collection of submatrices as:
3695 
3696 .vb
3697       A B C
3698       D E F
3699       G H I
3700 .ve
3701 
3702    Where the submatrices A,B,C are owned by proc0, D,E,F are
3703    owned by proc1, G,H,I are owned by proc2.
3704 
3705    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3706    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3707    The 'M','N' parameters are 8,8, and have the same values on all procs.
3708 
3709    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3710    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3711    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3712    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3713    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3714    matrix, ans [DF] as another SeqAIJ matrix.
3715 
3716    When d_nz, o_nz parameters are specified, d_nz storage elements are
3717    allocated for every row of the local diagonal submatrix, and o_nz
3718    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3719    One way to choose d_nz and o_nz is to use the max nonzerors per local
3720    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3721    In this case, the values of d_nz,o_nz are:
3722 .vb
3723      proc0 : dnz = 2, o_nz = 2
3724      proc1 : dnz = 3, o_nz = 2
3725      proc2 : dnz = 1, o_nz = 4
3726 .ve
3727    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3728    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3729    for proc3. i.e we are using 12+15+10=37 storage locations to store
3730    34 values.
3731 
3732    When d_nnz, o_nnz parameters are specified, the storage is specified
3733    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3734    In the above case the values for d_nnz,o_nnz are:
3735 .vb
3736      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3737      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3738      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3739 .ve
3740    Here the space allocated is sum of all the above values i.e 34, and
3741    hence pre-allocation is perfect.
3742 
3743    Level: intermediate
3744 
3745 .keywords: matrix, aij, compressed row, sparse, parallel
3746 
3747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3748           MPIAIJ, MatCreateMPIAIJWithArrays()
3749 @*/
3750 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3751 {
3752   PetscErrorCode ierr;
3753   PetscMPIInt    size;
3754 
3755   PetscFunctionBegin;
3756   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3757   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3758   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3759   if (size > 1) {
3760     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3761     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3762   } else {
3763     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3764     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3765   }
3766   PetscFunctionReturn(0);
3767 }
3768 
3769 #undef __FUNCT__
3770 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3771 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3772 {
3773   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3774 
3775   PetscFunctionBegin;
3776   if (Ad)     *Ad     = a->A;
3777   if (Ao)     *Ao     = a->B;
3778   if (colmap) *colmap = a->garray;
3779   PetscFunctionReturn(0);
3780 }
3781 
3782 #undef __FUNCT__
3783 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3784 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3785 {
3786   PetscErrorCode ierr;
3787   PetscInt       i;
3788   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3789 
3790   PetscFunctionBegin;
3791   if (coloring->ctype == IS_COLORING_GLOBAL) {
3792     ISColoringValue *allcolors,*colors;
3793     ISColoring      ocoloring;
3794 
3795     /* set coloring for diagonal portion */
3796     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3797 
3798     /* set coloring for off-diagonal portion */
3799     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3800     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3801     for (i=0; i<a->B->cmap->n; i++) {
3802       colors[i] = allcolors[a->garray[i]];
3803     }
3804     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3805     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3806     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3807     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3808   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3809     ISColoringValue *colors;
3810     PetscInt        *larray;
3811     ISColoring      ocoloring;
3812 
3813     /* set coloring for diagonal portion */
3814     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3815     for (i=0; i<a->A->cmap->n; i++) {
3816       larray[i] = i + A->cmap->rstart;
3817     }
3818     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3819     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3820     for (i=0; i<a->A->cmap->n; i++) {
3821       colors[i] = coloring->colors[larray[i]];
3822     }
3823     ierr = PetscFree(larray);CHKERRQ(ierr);
3824     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3825     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3826     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3827 
3828     /* set coloring for off-diagonal portion */
3829     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3830     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3831     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3832     for (i=0; i<a->B->cmap->n; i++) {
3833       colors[i] = coloring->colors[larray[i]];
3834     }
3835     ierr = PetscFree(larray);CHKERRQ(ierr);
3836     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3837     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3838     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3839   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3840   PetscFunctionReturn(0);
3841 }
3842 
3843 #undef __FUNCT__
3844 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3845 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3846 {
3847   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3848   PetscErrorCode ierr;
3849 
3850   PetscFunctionBegin;
3851   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3852   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3853   PetscFunctionReturn(0);
3854 }
3855 
3856 #undef __FUNCT__
3857 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3858 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3859 {
3860   PetscErrorCode ierr;
3861   PetscInt       m,N,i,rstart,nnz,Ii;
3862   PetscInt       *indx;
3863   PetscScalar    *values;
3864 
3865   PetscFunctionBegin;
3866   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3867   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3868     PetscInt       *dnz,*onz,sum,bs,cbs;
3869 
3870     if (n == PETSC_DECIDE) {
3871       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3872     }
3873     /* Check sum(n) = N */
3874     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3875     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3876 
3877     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3878     rstart -= m;
3879 
3880     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3881     for (i=0; i<m; i++) {
3882       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3883       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3884       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3885     }
3886 
3887     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3888     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3889     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3890     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3891     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3892     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3893     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3894   }
3895 
3896   /* numeric phase */
3897   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3898   for (i=0; i<m; i++) {
3899     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3900     Ii   = i + rstart;
3901     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3902     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3903   }
3904   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3905   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3906   PetscFunctionReturn(0);
3907 }
3908 
3909 #undef __FUNCT__
3910 #define __FUNCT__ "MatFileSplit"
3911 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3912 {
3913   PetscErrorCode    ierr;
3914   PetscMPIInt       rank;
3915   PetscInt          m,N,i,rstart,nnz;
3916   size_t            len;
3917   const PetscInt    *indx;
3918   PetscViewer       out;
3919   char              *name;
3920   Mat               B;
3921   const PetscScalar *values;
3922 
3923   PetscFunctionBegin;
3924   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3925   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3926   /* Should this be the type of the diagonal block of A? */
3927   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3928   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3929   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3930   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3931   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3932   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3933   for (i=0; i<m; i++) {
3934     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3935     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3936     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3937   }
3938   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3939   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3940 
3941   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3942   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3943   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3944   sprintf(name,"%s.%d",outfile,rank);
3945   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3946   ierr = PetscFree(name);CHKERRQ(ierr);
3947   ierr = MatView(B,out);CHKERRQ(ierr);
3948   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3949   ierr = MatDestroy(&B);CHKERRQ(ierr);
3950   PetscFunctionReturn(0);
3951 }
3952 
3953 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3954 #undef __FUNCT__
3955 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3956 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3957 {
3958   PetscErrorCode      ierr;
3959   Mat_Merge_SeqsToMPI *merge;
3960   PetscContainer      container;
3961 
3962   PetscFunctionBegin;
3963   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3964   if (container) {
3965     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3966     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3967     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3968     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3969     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3970     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3971     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3972     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3973     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3974     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3975     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3976     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3977     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3978     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3979     ierr = PetscFree(merge);CHKERRQ(ierr);
3980     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3981   }
3982   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3983   PetscFunctionReturn(0);
3984 }
3985 
3986 #include <../src/mat/utils/freespace.h>
3987 #include <petscbt.h>
3988 
3989 #undef __FUNCT__
3990 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3991 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3992 {
3993   PetscErrorCode      ierr;
3994   MPI_Comm            comm;
3995   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3996   PetscMPIInt         size,rank,taga,*len_s;
3997   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3998   PetscInt            proc,m;
3999   PetscInt            **buf_ri,**buf_rj;
4000   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4001   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4002   MPI_Request         *s_waits,*r_waits;
4003   MPI_Status          *status;
4004   MatScalar           *aa=a->a;
4005   MatScalar           **abuf_r,*ba_i;
4006   Mat_Merge_SeqsToMPI *merge;
4007   PetscContainer      container;
4008 
4009   PetscFunctionBegin;
4010   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4011   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4012 
4013   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4014   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4015 
4016   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4017   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4018 
4019   bi     = merge->bi;
4020   bj     = merge->bj;
4021   buf_ri = merge->buf_ri;
4022   buf_rj = merge->buf_rj;
4023 
4024   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4025   owners = merge->rowmap->range;
4026   len_s  = merge->len_s;
4027 
4028   /* send and recv matrix values */
4029   /*-----------------------------*/
4030   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4031   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4032 
4033   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4034   for (proc=0,k=0; proc<size; proc++) {
4035     if (!len_s[proc]) continue;
4036     i    = owners[proc];
4037     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4038     k++;
4039   }
4040 
4041   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4042   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4043   ierr = PetscFree(status);CHKERRQ(ierr);
4044 
4045   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4046   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4047 
4048   /* insert mat values of mpimat */
4049   /*----------------------------*/
4050   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4051   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4052 
4053   for (k=0; k<merge->nrecv; k++) {
4054     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4055     nrows       = *(buf_ri_k[k]);
4056     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4057     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4058   }
4059 
4060   /* set values of ba */
4061   m = merge->rowmap->n;
4062   for (i=0; i<m; i++) {
4063     arow = owners[rank] + i;
4064     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4065     bnzi = bi[i+1] - bi[i];
4066     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4067 
4068     /* add local non-zero vals of this proc's seqmat into ba */
4069     anzi   = ai[arow+1] - ai[arow];
4070     aj     = a->j + ai[arow];
4071     aa     = a->a + ai[arow];
4072     nextaj = 0;
4073     for (j=0; nextaj<anzi; j++) {
4074       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4075         ba_i[j] += aa[nextaj++];
4076       }
4077     }
4078 
4079     /* add received vals into ba */
4080     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4081       /* i-th row */
4082       if (i == *nextrow[k]) {
4083         anzi   = *(nextai[k]+1) - *nextai[k];
4084         aj     = buf_rj[k] + *(nextai[k]);
4085         aa     = abuf_r[k] + *(nextai[k]);
4086         nextaj = 0;
4087         for (j=0; nextaj<anzi; j++) {
4088           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4089             ba_i[j] += aa[nextaj++];
4090           }
4091         }
4092         nextrow[k]++; nextai[k]++;
4093       }
4094     }
4095     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4096   }
4097   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4098   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4099 
4100   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4101   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4102   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4103   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4104   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4105   PetscFunctionReturn(0);
4106 }
4107 
4108 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4109 
4110 #undef __FUNCT__
4111 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4112 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4113 {
4114   PetscErrorCode      ierr;
4115   Mat                 B_mpi;
4116   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4117   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4118   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4119   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4120   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4121   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4122   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4123   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4124   MPI_Status          *status;
4125   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4126   PetscBT             lnkbt;
4127   Mat_Merge_SeqsToMPI *merge;
4128   PetscContainer      container;
4129 
4130   PetscFunctionBegin;
4131   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4132 
4133   /* make sure it is a PETSc comm */
4134   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4135   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4136   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4137 
4138   ierr = PetscNew(&merge);CHKERRQ(ierr);
4139   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4140 
4141   /* determine row ownership */
4142   /*---------------------------------------------------------*/
4143   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4144   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4145   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4146   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4147   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4148   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4149   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4150 
4151   m      = merge->rowmap->n;
4152   owners = merge->rowmap->range;
4153 
4154   /* determine the number of messages to send, their lengths */
4155   /*---------------------------------------------------------*/
4156   len_s = merge->len_s;
4157 
4158   len          = 0; /* length of buf_si[] */
4159   merge->nsend = 0;
4160   for (proc=0; proc<size; proc++) {
4161     len_si[proc] = 0;
4162     if (proc == rank) {
4163       len_s[proc] = 0;
4164     } else {
4165       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4166       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4167     }
4168     if (len_s[proc]) {
4169       merge->nsend++;
4170       nrows = 0;
4171       for (i=owners[proc]; i<owners[proc+1]; i++) {
4172         if (ai[i+1] > ai[i]) nrows++;
4173       }
4174       len_si[proc] = 2*(nrows+1);
4175       len         += len_si[proc];
4176     }
4177   }
4178 
4179   /* determine the number and length of messages to receive for ij-structure */
4180   /*-------------------------------------------------------------------------*/
4181   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4182   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4183 
4184   /* post the Irecv of j-structure */
4185   /*-------------------------------*/
4186   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4187   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4188 
4189   /* post the Isend of j-structure */
4190   /*--------------------------------*/
4191   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4192 
4193   for (proc=0, k=0; proc<size; proc++) {
4194     if (!len_s[proc]) continue;
4195     i    = owners[proc];
4196     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4197     k++;
4198   }
4199 
4200   /* receives and sends of j-structure are complete */
4201   /*------------------------------------------------*/
4202   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4203   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4204 
4205   /* send and recv i-structure */
4206   /*---------------------------*/
4207   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4208   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4209 
4210   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4211   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4212   for (proc=0,k=0; proc<size; proc++) {
4213     if (!len_s[proc]) continue;
4214     /* form outgoing message for i-structure:
4215          buf_si[0]:                 nrows to be sent
4216                [1:nrows]:           row index (global)
4217                [nrows+1:2*nrows+1]: i-structure index
4218     */
4219     /*-------------------------------------------*/
4220     nrows       = len_si[proc]/2 - 1;
4221     buf_si_i    = buf_si + nrows+1;
4222     buf_si[0]   = nrows;
4223     buf_si_i[0] = 0;
4224     nrows       = 0;
4225     for (i=owners[proc]; i<owners[proc+1]; i++) {
4226       anzi = ai[i+1] - ai[i];
4227       if (anzi) {
4228         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4229         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4230         nrows++;
4231       }
4232     }
4233     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4234     k++;
4235     buf_si += len_si[proc];
4236   }
4237 
4238   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4239   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4240 
4241   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4242   for (i=0; i<merge->nrecv; i++) {
4243     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4244   }
4245 
4246   ierr = PetscFree(len_si);CHKERRQ(ierr);
4247   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4248   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4249   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4250   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4251   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4252   ierr = PetscFree(status);CHKERRQ(ierr);
4253 
4254   /* compute a local seq matrix in each processor */
4255   /*----------------------------------------------*/
4256   /* allocate bi array and free space for accumulating nonzero column info */
4257   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4258   bi[0] = 0;
4259 
4260   /* create and initialize a linked list */
4261   nlnk = N+1;
4262   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4263 
4264   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4265   len  = ai[owners[rank+1]] - ai[owners[rank]];
4266   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4267 
4268   current_space = free_space;
4269 
4270   /* determine symbolic info for each local row */
4271   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4272 
4273   for (k=0; k<merge->nrecv; k++) {
4274     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4275     nrows       = *buf_ri_k[k];
4276     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4277     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4278   }
4279 
4280   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4281   len  = 0;
4282   for (i=0; i<m; i++) {
4283     bnzi = 0;
4284     /* add local non-zero cols of this proc's seqmat into lnk */
4285     arow  = owners[rank] + i;
4286     anzi  = ai[arow+1] - ai[arow];
4287     aj    = a->j + ai[arow];
4288     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4289     bnzi += nlnk;
4290     /* add received col data into lnk */
4291     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4292       if (i == *nextrow[k]) { /* i-th row */
4293         anzi  = *(nextai[k]+1) - *nextai[k];
4294         aj    = buf_rj[k] + *nextai[k];
4295         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4296         bnzi += nlnk;
4297         nextrow[k]++; nextai[k]++;
4298       }
4299     }
4300     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4301 
4302     /* if free space is not available, make more free space */
4303     if (current_space->local_remaining<bnzi) {
4304       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4305       nspacedouble++;
4306     }
4307     /* copy data into free space, then initialize lnk */
4308     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4309     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4310 
4311     current_space->array           += bnzi;
4312     current_space->local_used      += bnzi;
4313     current_space->local_remaining -= bnzi;
4314 
4315     bi[i+1] = bi[i] + bnzi;
4316   }
4317 
4318   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4319 
4320   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4321   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4322   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4323 
4324   /* create symbolic parallel matrix B_mpi */
4325   /*---------------------------------------*/
4326   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4327   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4328   if (n==PETSC_DECIDE) {
4329     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4330   } else {
4331     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4332   }
4333   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4334   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4335   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4336   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4337   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4338 
4339   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4340   B_mpi->assembled    = PETSC_FALSE;
4341   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4342   merge->bi           = bi;
4343   merge->bj           = bj;
4344   merge->buf_ri       = buf_ri;
4345   merge->buf_rj       = buf_rj;
4346   merge->coi          = NULL;
4347   merge->coj          = NULL;
4348   merge->owners_co    = NULL;
4349 
4350   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4351 
4352   /* attach the supporting struct to B_mpi for reuse */
4353   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4354   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4355   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4356   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4357   *mpimat = B_mpi;
4358 
4359   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4360   PetscFunctionReturn(0);
4361 }
4362 
4363 #undef __FUNCT__
4364 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4365 /*@C
4366       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4367                  matrices from each processor
4368 
4369     Collective on MPI_Comm
4370 
4371    Input Parameters:
4372 +    comm - the communicators the parallel matrix will live on
4373 .    seqmat - the input sequential matrices
4374 .    m - number of local rows (or PETSC_DECIDE)
4375 .    n - number of local columns (or PETSC_DECIDE)
4376 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4377 
4378    Output Parameter:
4379 .    mpimat - the parallel matrix generated
4380 
4381     Level: advanced
4382 
4383    Notes:
4384      The dimensions of the sequential matrix in each processor MUST be the same.
4385      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4386      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4387 @*/
4388 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4389 {
4390   PetscErrorCode ierr;
4391   PetscMPIInt    size;
4392 
4393   PetscFunctionBegin;
4394   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4395   if (size == 1) {
4396     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4397     if (scall == MAT_INITIAL_MATRIX) {
4398       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4399     } else {
4400       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4401     }
4402     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4403     PetscFunctionReturn(0);
4404   }
4405   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4406   if (scall == MAT_INITIAL_MATRIX) {
4407     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4408   }
4409   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4410   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4411   PetscFunctionReturn(0);
4412 }
4413 
4414 #undef __FUNCT__
4415 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4416 /*@
4417      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4418           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4419           with MatGetSize()
4420 
4421     Not Collective
4422 
4423    Input Parameters:
4424 +    A - the matrix
4425 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4426 
4427    Output Parameter:
4428 .    A_loc - the local sequential matrix generated
4429 
4430     Level: developer
4431 
4432 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4433 
4434 @*/
4435 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4436 {
4437   PetscErrorCode ierr;
4438   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4439   Mat_SeqAIJ     *mat,*a,*b;
4440   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4441   MatScalar      *aa,*ba,*cam;
4442   PetscScalar    *ca;
4443   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4444   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4445   PetscBool      match;
4446   MPI_Comm       comm;
4447   PetscMPIInt    size;
4448 
4449   PetscFunctionBegin;
4450   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4451   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4452   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4453   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4454   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4455 
4456   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4457   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4458   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4459   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4460   aa = a->a; ba = b->a;
4461   if (scall == MAT_INITIAL_MATRIX) {
4462     if (size == 1) {
4463       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4464       PetscFunctionReturn(0);
4465     }
4466 
4467     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4468     ci[0] = 0;
4469     for (i=0; i<am; i++) {
4470       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4471     }
4472     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4473     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4474     k    = 0;
4475     for (i=0; i<am; i++) {
4476       ncols_o = bi[i+1] - bi[i];
4477       ncols_d = ai[i+1] - ai[i];
4478       /* off-diagonal portion of A */
4479       for (jo=0; jo<ncols_o; jo++) {
4480         col = cmap[*bj];
4481         if (col >= cstart) break;
4482         cj[k]   = col; bj++;
4483         ca[k++] = *ba++;
4484       }
4485       /* diagonal portion of A */
4486       for (j=0; j<ncols_d; j++) {
4487         cj[k]   = cstart + *aj++;
4488         ca[k++] = *aa++;
4489       }
4490       /* off-diagonal portion of A */
4491       for (j=jo; j<ncols_o; j++) {
4492         cj[k]   = cmap[*bj++];
4493         ca[k++] = *ba++;
4494       }
4495     }
4496     /* put together the new matrix */
4497     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4498     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4499     /* Since these are PETSc arrays, change flags to free them as necessary. */
4500     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4501     mat->free_a  = PETSC_TRUE;
4502     mat->free_ij = PETSC_TRUE;
4503     mat->nonew   = 0;
4504   } else if (scall == MAT_REUSE_MATRIX) {
4505     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4506     ci = mat->i; cj = mat->j; cam = mat->a;
4507     for (i=0; i<am; i++) {
4508       /* off-diagonal portion of A */
4509       ncols_o = bi[i+1] - bi[i];
4510       for (jo=0; jo<ncols_o; jo++) {
4511         col = cmap[*bj];
4512         if (col >= cstart) break;
4513         *cam++ = *ba++; bj++;
4514       }
4515       /* diagonal portion of A */
4516       ncols_d = ai[i+1] - ai[i];
4517       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4518       /* off-diagonal portion of A */
4519       for (j=jo; j<ncols_o; j++) {
4520         *cam++ = *ba++; bj++;
4521       }
4522     }
4523   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4524   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4525   PetscFunctionReturn(0);
4526 }
4527 
4528 #undef __FUNCT__
4529 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4530 /*@C
4531      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4532 
4533     Not Collective
4534 
4535    Input Parameters:
4536 +    A - the matrix
4537 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4538 -    row, col - index sets of rows and columns to extract (or NULL)
4539 
4540    Output Parameter:
4541 .    A_loc - the local sequential matrix generated
4542 
4543     Level: developer
4544 
4545 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4546 
4547 @*/
4548 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4549 {
4550   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4551   PetscErrorCode ierr;
4552   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4553   IS             isrowa,iscola;
4554   Mat            *aloc;
4555   PetscBool      match;
4556 
4557   PetscFunctionBegin;
4558   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4559   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4560   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4561   if (!row) {
4562     start = A->rmap->rstart; end = A->rmap->rend;
4563     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4564   } else {
4565     isrowa = *row;
4566   }
4567   if (!col) {
4568     start = A->cmap->rstart;
4569     cmap  = a->garray;
4570     nzA   = a->A->cmap->n;
4571     nzB   = a->B->cmap->n;
4572     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4573     ncols = 0;
4574     for (i=0; i<nzB; i++) {
4575       if (cmap[i] < start) idx[ncols++] = cmap[i];
4576       else break;
4577     }
4578     imark = i;
4579     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4580     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4581     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4582   } else {
4583     iscola = *col;
4584   }
4585   if (scall != MAT_INITIAL_MATRIX) {
4586     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4587     aloc[0] = *A_loc;
4588   }
4589   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4590   *A_loc = aloc[0];
4591   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4592   if (!row) {
4593     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4594   }
4595   if (!col) {
4596     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4597   }
4598   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4599   PetscFunctionReturn(0);
4600 }
4601 
4602 #undef __FUNCT__
4603 #define __FUNCT__ "MatGetBrowsOfAcols"
4604 /*@C
4605     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4606 
4607     Collective on Mat
4608 
4609    Input Parameters:
4610 +    A,B - the matrices in mpiaij format
4611 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4612 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4613 
4614    Output Parameter:
4615 +    rowb, colb - index sets of rows and columns of B to extract
4616 -    B_seq - the sequential matrix generated
4617 
4618     Level: developer
4619 
4620 @*/
4621 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4622 {
4623   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4624   PetscErrorCode ierr;
4625   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4626   IS             isrowb,iscolb;
4627   Mat            *bseq=NULL;
4628 
4629   PetscFunctionBegin;
4630   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4631     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4632   }
4633   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4634 
4635   if (scall == MAT_INITIAL_MATRIX) {
4636     start = A->cmap->rstart;
4637     cmap  = a->garray;
4638     nzA   = a->A->cmap->n;
4639     nzB   = a->B->cmap->n;
4640     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4641     ncols = 0;
4642     for (i=0; i<nzB; i++) {  /* row < local row index */
4643       if (cmap[i] < start) idx[ncols++] = cmap[i];
4644       else break;
4645     }
4646     imark = i;
4647     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4648     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4649     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4650     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4651   } else {
4652     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4653     isrowb  = *rowb; iscolb = *colb;
4654     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4655     bseq[0] = *B_seq;
4656   }
4657   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4658   *B_seq = bseq[0];
4659   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4660   if (!rowb) {
4661     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4662   } else {
4663     *rowb = isrowb;
4664   }
4665   if (!colb) {
4666     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4667   } else {
4668     *colb = iscolb;
4669   }
4670   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4671   PetscFunctionReturn(0);
4672 }
4673 
4674 #undef __FUNCT__
4675 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4676 /*
4677     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4678     of the OFF-DIAGONAL portion of local A
4679 
4680     Collective on Mat
4681 
4682    Input Parameters:
4683 +    A,B - the matrices in mpiaij format
4684 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4685 
4686    Output Parameter:
4687 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4688 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4689 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4690 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4691 
4692     Level: developer
4693 
4694 */
4695 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4696 {
4697   VecScatter_MPI_General *gen_to,*gen_from;
4698   PetscErrorCode         ierr;
4699   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4700   Mat_SeqAIJ             *b_oth;
4701   VecScatter             ctx =a->Mvctx;
4702   MPI_Comm               comm;
4703   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4704   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4705   PetscScalar            *rvalues,*svalues;
4706   MatScalar              *b_otha,*bufa,*bufA;
4707   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4708   MPI_Request            *rwaits = NULL,*swaits = NULL;
4709   MPI_Status             *sstatus,rstatus;
4710   PetscMPIInt            jj,size;
4711   PetscInt               *cols,sbs,rbs;
4712   PetscScalar            *vals;
4713 
4714   PetscFunctionBegin;
4715   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4716   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4717 
4718   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4719     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4720   }
4721   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4722   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4723 
4724   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4725   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4726   rvalues  = gen_from->values; /* holds the length of receiving row */
4727   svalues  = gen_to->values;   /* holds the length of sending row */
4728   nrecvs   = gen_from->n;
4729   nsends   = gen_to->n;
4730 
4731   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4732   srow    = gen_to->indices;    /* local row index to be sent */
4733   sstarts = gen_to->starts;
4734   sprocs  = gen_to->procs;
4735   sstatus = gen_to->sstatus;
4736   sbs     = gen_to->bs;
4737   rstarts = gen_from->starts;
4738   rprocs  = gen_from->procs;
4739   rbs     = gen_from->bs;
4740 
4741   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4742   if (scall == MAT_INITIAL_MATRIX) {
4743     /* i-array */
4744     /*---------*/
4745     /*  post receives */
4746     for (i=0; i<nrecvs; i++) {
4747       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4748       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4749       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4750     }
4751 
4752     /* pack the outgoing message */
4753     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4754 
4755     sstartsj[0] = 0;
4756     rstartsj[0] = 0;
4757     len         = 0; /* total length of j or a array to be sent */
4758     k           = 0;
4759     for (i=0; i<nsends; i++) {
4760       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4761       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4762       for (j=0; j<nrows; j++) {
4763         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4764         for (l=0; l<sbs; l++) {
4765           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4766 
4767           rowlen[j*sbs+l] = ncols;
4768 
4769           len += ncols;
4770           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4771         }
4772         k++;
4773       }
4774       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4775 
4776       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4777     }
4778     /* recvs and sends of i-array are completed */
4779     i = nrecvs;
4780     while (i--) {
4781       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4782     }
4783     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4784 
4785     /* allocate buffers for sending j and a arrays */
4786     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4787     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4788 
4789     /* create i-array of B_oth */
4790     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4791 
4792     b_othi[0] = 0;
4793     len       = 0; /* total length of j or a array to be received */
4794     k         = 0;
4795     for (i=0; i<nrecvs; i++) {
4796       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4797       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4798       for (j=0; j<nrows; j++) {
4799         b_othi[k+1] = b_othi[k] + rowlen[j];
4800         ierr = PetscIntSumError(rowlen[j],len,&len);
4801         k++;
4802       }
4803       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4804     }
4805 
4806     /* allocate space for j and a arrrays of B_oth */
4807     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4808     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4809 
4810     /* j-array */
4811     /*---------*/
4812     /*  post receives of j-array */
4813     for (i=0; i<nrecvs; i++) {
4814       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4815       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4816     }
4817 
4818     /* pack the outgoing message j-array */
4819     k = 0;
4820     for (i=0; i<nsends; i++) {
4821       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4822       bufJ  = bufj+sstartsj[i];
4823       for (j=0; j<nrows; j++) {
4824         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4825         for (ll=0; ll<sbs; ll++) {
4826           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4827           for (l=0; l<ncols; l++) {
4828             *bufJ++ = cols[l];
4829           }
4830           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4831         }
4832       }
4833       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4834     }
4835 
4836     /* recvs and sends of j-array are completed */
4837     i = nrecvs;
4838     while (i--) {
4839       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4840     }
4841     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4842   } else if (scall == MAT_REUSE_MATRIX) {
4843     sstartsj = *startsj_s;
4844     rstartsj = *startsj_r;
4845     bufa     = *bufa_ptr;
4846     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4847     b_otha   = b_oth->a;
4848   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4849 
4850   /* a-array */
4851   /*---------*/
4852   /*  post receives of a-array */
4853   for (i=0; i<nrecvs; i++) {
4854     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4855     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4856   }
4857 
4858   /* pack the outgoing message a-array */
4859   k = 0;
4860   for (i=0; i<nsends; i++) {
4861     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4862     bufA  = bufa+sstartsj[i];
4863     for (j=0; j<nrows; j++) {
4864       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4865       for (ll=0; ll<sbs; ll++) {
4866         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4867         for (l=0; l<ncols; l++) {
4868           *bufA++ = vals[l];
4869         }
4870         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4871       }
4872     }
4873     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4874   }
4875   /* recvs and sends of a-array are completed */
4876   i = nrecvs;
4877   while (i--) {
4878     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4879   }
4880   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4881   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4882 
4883   if (scall == MAT_INITIAL_MATRIX) {
4884     /* put together the new matrix */
4885     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4886 
4887     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4888     /* Since these are PETSc arrays, change flags to free them as necessary. */
4889     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4890     b_oth->free_a  = PETSC_TRUE;
4891     b_oth->free_ij = PETSC_TRUE;
4892     b_oth->nonew   = 0;
4893 
4894     ierr = PetscFree(bufj);CHKERRQ(ierr);
4895     if (!startsj_s || !bufa_ptr) {
4896       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4897       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4898     } else {
4899       *startsj_s = sstartsj;
4900       *startsj_r = rstartsj;
4901       *bufa_ptr  = bufa;
4902     }
4903   }
4904   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4905   PetscFunctionReturn(0);
4906 }
4907 
4908 #undef __FUNCT__
4909 #define __FUNCT__ "MatGetCommunicationStructs"
4910 /*@C
4911   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4912 
4913   Not Collective
4914 
4915   Input Parameters:
4916 . A - The matrix in mpiaij format
4917 
4918   Output Parameter:
4919 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4920 . colmap - A map from global column index to local index into lvec
4921 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4922 
4923   Level: developer
4924 
4925 @*/
4926 #if defined(PETSC_USE_CTABLE)
4927 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4928 #else
4929 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4930 #endif
4931 {
4932   Mat_MPIAIJ *a;
4933 
4934   PetscFunctionBegin;
4935   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4936   PetscValidPointer(lvec, 2);
4937   PetscValidPointer(colmap, 3);
4938   PetscValidPointer(multScatter, 4);
4939   a = (Mat_MPIAIJ*) A->data;
4940   if (lvec) *lvec = a->lvec;
4941   if (colmap) *colmap = a->colmap;
4942   if (multScatter) *multScatter = a->Mvctx;
4943   PetscFunctionReturn(0);
4944 }
4945 
4946 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4947 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4948 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4949 #if defined(PETSC_HAVE_ELEMENTAL)
4950 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4951 #endif
4952 
4953 #undef __FUNCT__
4954 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4955 /*
4956     Computes (B'*A')' since computing B*A directly is untenable
4957 
4958                n                       p                          p
4959         (              )       (              )         (                  )
4960       m (      A       )  *  n (       B      )   =   m (         C        )
4961         (              )       (              )         (                  )
4962 
4963 */
4964 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4965 {
4966   PetscErrorCode ierr;
4967   Mat            At,Bt,Ct;
4968 
4969   PetscFunctionBegin;
4970   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4971   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4972   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4973   ierr = MatDestroy(&At);CHKERRQ(ierr);
4974   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4975   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4976   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4977   PetscFunctionReturn(0);
4978 }
4979 
4980 #undef __FUNCT__
4981 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4982 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4983 {
4984   PetscErrorCode ierr;
4985   PetscInt       m=A->rmap->n,n=B->cmap->n;
4986   Mat            Cmat;
4987 
4988   PetscFunctionBegin;
4989   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4990   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4991   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4992   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4993   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4994   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4995   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4996   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4997 
4998   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4999 
5000   *C = Cmat;
5001   PetscFunctionReturn(0);
5002 }
5003 
5004 /* ----------------------------------------------------------------*/
5005 #undef __FUNCT__
5006 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5007 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5008 {
5009   PetscErrorCode ierr;
5010 
5011   PetscFunctionBegin;
5012   if (scall == MAT_INITIAL_MATRIX) {
5013     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5014     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5015     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5016   }
5017   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5018   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5019   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5020   PetscFunctionReturn(0);
5021 }
5022 
5023 /*MC
5024    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5025 
5026    Options Database Keys:
5027 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5028 
5029   Level: beginner
5030 
5031 .seealso: MatCreateAIJ()
5032 M*/
5033 
5034 #undef __FUNCT__
5035 #define __FUNCT__ "MatCreate_MPIAIJ"
5036 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5037 {
5038   Mat_MPIAIJ     *b;
5039   PetscErrorCode ierr;
5040   PetscMPIInt    size;
5041 
5042   PetscFunctionBegin;
5043   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5044 
5045   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5046   B->data       = (void*)b;
5047   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5048   B->assembled  = PETSC_FALSE;
5049   B->insertmode = NOT_SET_VALUES;
5050   b->size       = size;
5051 
5052   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5053 
5054   /* build cache for off array entries formed */
5055   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5056 
5057   b->donotstash  = PETSC_FALSE;
5058   b->colmap      = 0;
5059   b->garray      = 0;
5060   b->roworiented = PETSC_TRUE;
5061 
5062   /* stuff used for matrix vector multiply */
5063   b->lvec  = NULL;
5064   b->Mvctx = NULL;
5065 
5066   /* stuff for MatGetRow() */
5067   b->rowindices   = 0;
5068   b->rowvalues    = 0;
5069   b->getrowactive = PETSC_FALSE;
5070 
5071   /* flexible pointer used in CUSP/CUSPARSE classes */
5072   b->spptr = NULL;
5073 
5074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5075   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5076   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5085 #if defined(PETSC_HAVE_ELEMENTAL)
5086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5087 #endif
5088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5091   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5092   PetscFunctionReturn(0);
5093 }
5094 
5095 #undef __FUNCT__
5096 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5097 /*@C
5098      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5099          and "off-diagonal" part of the matrix in CSR format.
5100 
5101    Collective on MPI_Comm
5102 
5103    Input Parameters:
5104 +  comm - MPI communicator
5105 .  m - number of local rows (Cannot be PETSC_DECIDE)
5106 .  n - This value should be the same as the local size used in creating the
5107        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5108        calculated if N is given) For square matrices n is almost always m.
5109 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5110 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5111 .   i - row indices for "diagonal" portion of matrix
5112 .   j - column indices
5113 .   a - matrix values
5114 .   oi - row indices for "off-diagonal" portion of matrix
5115 .   oj - column indices
5116 -   oa - matrix values
5117 
5118    Output Parameter:
5119 .   mat - the matrix
5120 
5121    Level: advanced
5122 
5123    Notes:
5124        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5125        must free the arrays once the matrix has been destroyed and not before.
5126 
5127        The i and j indices are 0 based
5128 
5129        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5130 
5131        This sets local rows and cannot be used to set off-processor values.
5132 
5133        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5134        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5135        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5136        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5137        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5138        communication if it is known that only local entries will be set.
5139 
5140 .keywords: matrix, aij, compressed row, sparse, parallel
5141 
5142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5143           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5144 @*/
5145 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5146 {
5147   PetscErrorCode ierr;
5148   Mat_MPIAIJ     *maij;
5149 
5150   PetscFunctionBegin;
5151   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5152   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5153   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5154   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5155   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5156   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5157   maij = (Mat_MPIAIJ*) (*mat)->data;
5158 
5159   (*mat)->preallocated = PETSC_TRUE;
5160 
5161   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5162   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5163 
5164   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5165   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5166 
5167   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5168   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5169   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5170   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5171 
5172   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5173   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5174   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5175   PetscFunctionReturn(0);
5176 }
5177 
5178 /*
5179     Special version for direct calls from Fortran
5180 */
5181 #include <petsc/private/fortranimpl.h>
5182 
5183 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5184 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5185 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5186 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5187 #endif
5188 
5189 /* Change these macros so can be used in void function */
5190 #undef CHKERRQ
5191 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5192 #undef SETERRQ2
5193 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5194 #undef SETERRQ3
5195 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5196 #undef SETERRQ
5197 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5198 
5199 #undef __FUNCT__
5200 #define __FUNCT__ "matsetvaluesmpiaij_"
5201 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5202 {
5203   Mat            mat  = *mmat;
5204   PetscInt       m    = *mm, n = *mn;
5205   InsertMode     addv = *maddv;
5206   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5207   PetscScalar    value;
5208   PetscErrorCode ierr;
5209 
5210   MatCheckPreallocated(mat,1);
5211   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5212 
5213 #if defined(PETSC_USE_DEBUG)
5214   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5215 #endif
5216   {
5217     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5218     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5219     PetscBool roworiented = aij->roworiented;
5220 
5221     /* Some Variables required in the macro */
5222     Mat        A                 = aij->A;
5223     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5224     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5225     MatScalar  *aa               = a->a;
5226     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5227     Mat        B                 = aij->B;
5228     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5229     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5230     MatScalar  *ba               = b->a;
5231 
5232     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5233     PetscInt  nonew = a->nonew;
5234     MatScalar *ap1,*ap2;
5235 
5236     PetscFunctionBegin;
5237     for (i=0; i<m; i++) {
5238       if (im[i] < 0) continue;
5239 #if defined(PETSC_USE_DEBUG)
5240       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5241 #endif
5242       if (im[i] >= rstart && im[i] < rend) {
5243         row      = im[i] - rstart;
5244         lastcol1 = -1;
5245         rp1      = aj + ai[row];
5246         ap1      = aa + ai[row];
5247         rmax1    = aimax[row];
5248         nrow1    = ailen[row];
5249         low1     = 0;
5250         high1    = nrow1;
5251         lastcol2 = -1;
5252         rp2      = bj + bi[row];
5253         ap2      = ba + bi[row];
5254         rmax2    = bimax[row];
5255         nrow2    = bilen[row];
5256         low2     = 0;
5257         high2    = nrow2;
5258 
5259         for (j=0; j<n; j++) {
5260           if (roworiented) value = v[i*n+j];
5261           else value = v[i+j*m];
5262           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5263           if (in[j] >= cstart && in[j] < cend) {
5264             col = in[j] - cstart;
5265             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5266           } else if (in[j] < 0) continue;
5267 #if defined(PETSC_USE_DEBUG)
5268           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5269 #endif
5270           else {
5271             if (mat->was_assembled) {
5272               if (!aij->colmap) {
5273                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5274               }
5275 #if defined(PETSC_USE_CTABLE)
5276               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5277               col--;
5278 #else
5279               col = aij->colmap[in[j]] - 1;
5280 #endif
5281               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5282                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5283                 col  =  in[j];
5284                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5285                 B     = aij->B;
5286                 b     = (Mat_SeqAIJ*)B->data;
5287                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5288                 rp2   = bj + bi[row];
5289                 ap2   = ba + bi[row];
5290                 rmax2 = bimax[row];
5291                 nrow2 = bilen[row];
5292                 low2  = 0;
5293                 high2 = nrow2;
5294                 bm    = aij->B->rmap->n;
5295                 ba    = b->a;
5296               }
5297             } else col = in[j];
5298             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5299           }
5300         }
5301       } else if (!aij->donotstash) {
5302         if (roworiented) {
5303           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5304         } else {
5305           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5306         }
5307       }
5308     }
5309   }
5310   PetscFunctionReturnVoid();
5311 }
5312 
5313