xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e91ef88e73a631b88b8031ec3d9dd16cd2dcf0e1)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685   InsertMode     addv;
686 
687   PetscFunctionBegin;
688   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
689 
690   /* make sure all processors are either in INSERTMODE or ADDMODE */
691   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
692   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
693   mat->insertmode = addv; /* in case this processor had no cache */
694 
695   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
696   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
697   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
703 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
704 {
705   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
706   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
707   PetscErrorCode ierr;
708   PetscMPIInt    n;
709   PetscInt       i,j,rstart,ncols,flg;
710   PetscInt       *row,*col;
711   PetscBool      other_disassembled;
712   PetscScalar    *val;
713   InsertMode     addv = mat->insertmode;
714 
715   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
716 
717   PetscFunctionBegin;
718   if (!aij->donotstash && !mat->nooffprocentries) {
719     while (1) {
720       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
721       if (!flg) break;
722 
723       for (i=0; i<n; ) {
724         /* Now identify the consecutive vals belonging to the same row */
725         for (j=i,rstart=row[j]; j<n; j++) {
726           if (row[j] != rstart) break;
727         }
728         if (j < n) ncols = j-i;
729         else       ncols = n-i;
730         /* Now assemble all these values with a single function call */
731         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
732 
733         i = j;
734       }
735     }
736     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
737   }
738   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
740 
741   /* determine if any processor has disassembled, if so we must
742      also disassemble ourselfs, in order that we may reassemble. */
743   /*
744      if nonzero structure of submatrix B cannot change then we know that
745      no processor disassembled thus we can skip this stuff
746   */
747   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
748     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
749     if (mat->was_assembled && !other_disassembled) {
750       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
751     }
752   }
753   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
754     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
755   }
756   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
757   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
758   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
759 
760   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
761 
762   aij->rowvalues = 0;
763 
764   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
765   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
766 
767   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
768   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
769     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
770     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
777 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
778 {
779   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
780   PetscErrorCode ierr;
781 
782   PetscFunctionBegin;
783   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
784   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatZeroRows_MPIAIJ"
790 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
791 {
792   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
793   PetscInt      *owners = A->rmap->range;
794   PetscInt       n      = A->rmap->n;
795   PetscSF        sf;
796   PetscInt      *lrows;
797   PetscSFNode   *rrows;
798   PetscInt       r, p = 0, len = 0;
799   PetscErrorCode ierr;
800 
801   PetscFunctionBegin;
802   /* Create SF where leaves are input rows and roots are owned rows */
803   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
804   for (r = 0; r < n; ++r) lrows[r] = -1;
805   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
806   for (r = 0; r < N; ++r) {
807     const PetscInt idx   = rows[r];
808     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
809     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
810       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
811     }
812     if (A->nooffproczerorows) {
813       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
814       lrows[len++] = idx - owners[p];
815     } else {
816       rrows[r].rank = p;
817       rrows[r].index = rows[r] - owners[p];
818     }
819   }
820   if (!A->nooffproczerorows) {
821     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
822     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
823     /* Collect flags for rows to be zeroed */
824     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
825     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
826     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
827     /* Compress and put in row numbers */
828     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
829   }
830   /* fix right hand side if needed */
831   if (x && b) {
832     const PetscScalar *xx;
833     PetscScalar       *bb;
834 
835     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
836     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
837     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
838     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
839     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
840   }
841   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
843   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
844     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
848     for (r = 0; r < len; ++r) {
849       const PetscInt row = lrows[r] + A->rmap->rstart;
850       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
851     }
852     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   } else {
855     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
856   }
857   ierr = PetscFree(lrows);CHKERRQ(ierr);
858 
859   /* only change matrix nonzero state if pattern was allowed to be changed */
860   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
861     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
862     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
869 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
870 {
871   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
872   PetscErrorCode    ierr;
873   PetscMPIInt       n = A->rmap->n;
874   PetscInt          i,j,r,m,p = 0,len = 0;
875   PetscInt          *lrows,*owners = A->rmap->range;
876   PetscSFNode       *rrows;
877   PetscSF           sf;
878   const PetscScalar *xx;
879   PetscScalar       *bb,*mask;
880   Vec               xmask,lmask;
881   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
882   const PetscInt    *aj, *ii,*ridx;
883   PetscScalar       *aa;
884 
885   PetscFunctionBegin;
886   /* Create SF where leaves are input rows and roots are owned rows */
887   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
888   for (r = 0; r < n; ++r) lrows[r] = -1;
889   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
890   for (r = 0; r < N; ++r) {
891     const PetscInt idx   = rows[r];
892     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
893     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
894       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
895     }
896     rrows[r].rank  = p;
897     rrows[r].index = rows[r] - owners[p];
898   }
899   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
900   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
901   /* Collect flags for rows to be zeroed */
902   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
903   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
904   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
905   /* Compress and put in row numbers */
906   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
907   /* zero diagonal part of matrix */
908   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
909   /* handle off diagonal part of matrix */
910   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
911   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
912   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
913   for (i=0; i<len; i++) bb[lrows[i]] = 1;
914   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
915   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
918   if (x) {
919     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
922     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
923   }
924   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
925   /* remove zeroed rows of off diagonal matrix */
926   ii = aij->i;
927   for (i=0; i<len; i++) {
928     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
929   }
930   /* loop over all elements of off process part of matrix zeroing removed columns*/
931   if (aij->compressedrow.use) {
932     m    = aij->compressedrow.nrows;
933     ii   = aij->compressedrow.i;
934     ridx = aij->compressedrow.rindex;
935     for (i=0; i<m; i++) {
936       n  = ii[i+1] - ii[i];
937       aj = aij->j + ii[i];
938       aa = aij->a + ii[i];
939 
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[*ridx] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948       ridx++;
949     }
950   } else { /* do not use compressed row format */
951     m = l->B->rmap->n;
952     for (i=0; i<m; i++) {
953       n  = ii[i+1] - ii[i];
954       aj = aij->j + ii[i];
955       aa = aij->a + ii[i];
956       for (j=0; j<n; j++) {
957         if (PetscAbsScalar(mask[*aj])) {
958           if (b) bb[i] -= *aa*xx[*aj];
959           *aa = 0.0;
960         }
961         aa++;
962         aj++;
963       }
964     }
965   }
966   if (x) {
967     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
968     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   }
970   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
971   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
972   ierr = PetscFree(lrows);CHKERRQ(ierr);
973 
974   /* only change matrix nonzero state if pattern was allowed to be changed */
975   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
976     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
977     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatMult_MPIAIJ"
984 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
985 {
986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
987   PetscErrorCode ierr;
988   PetscInt       nt;
989 
990   PetscFunctionBegin;
991   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
992   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
993   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
995   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1002 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1003 {
1004   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1014 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1015 {
1016   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1022   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1029 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1030 {
1031   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1032   PetscErrorCode ierr;
1033   PetscBool      merged;
1034 
1035   PetscFunctionBegin;
1036   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1037   /* do nondiagonal part */
1038   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1039   if (!merged) {
1040     /* send it on its way */
1041     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1042     /* do local part */
1043     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1044     /* receive remote parts: note this assumes the values are not actually */
1045     /* added in yy until the next line, */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   } else {
1048     /* do local part */
1049     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1050     /* send it on its way */
1051     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1052     /* values actually were received in the Begin() but we need to call this nop */
1053     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1060 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1061 {
1062   MPI_Comm       comm;
1063   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1064   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1065   IS             Me,Notme;
1066   PetscErrorCode ierr;
1067   PetscInt       M,N,first,last,*notme,i;
1068   PetscMPIInt    size;
1069 
1070   PetscFunctionBegin;
1071   /* Easy test: symmetric diagonal block */
1072   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1073   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1074   if (!*f) PetscFunctionReturn(0);
1075   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1076   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1077   if (size == 1) PetscFunctionReturn(0);
1078 
1079   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1080   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1081   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1082   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1083   for (i=0; i<first; i++) notme[i] = i;
1084   for (i=last; i<M; i++) notme[i-last+first] = i;
1085   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1086   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1087   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1088   Aoff = Aoffs[0];
1089   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1090   Boff = Boffs[0];
1091   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1092   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1093   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1094   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1095   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1096   ierr = PetscFree(notme);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1102 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   /* do nondiagonal part */
1109   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1110   /* send it on its way */
1111   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1112   /* do local part */
1113   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1114   /* receive remote parts */
1115   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*
1120   This only works correctly for square matrices where the subblock A->A is the
1121    diagonal block
1122 */
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1125 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1126 {
1127   PetscErrorCode ierr;
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129 
1130   PetscFunctionBegin;
1131   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1132   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1133   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatScale_MPIAIJ"
1139 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1140 {
1141   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1146   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatDestroy_MPIAIJ"
1152 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1153 {
1154   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158 #if defined(PETSC_USE_LOG)
1159   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1160 #endif
1161   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1162   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1163   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1164   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1165 #if defined(PETSC_USE_CTABLE)
1166   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1167 #else
1168   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1169 #endif
1170   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1171   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1172   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1173   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1174   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1175   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1176 
1177   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1186 #if defined(PETSC_HAVE_ELEMENTAL)
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1188 #endif
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1194 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1195 {
1196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1197   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1198   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1199   PetscErrorCode ierr;
1200   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1201   int            fd;
1202   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1203   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1204   PetscScalar    *column_values;
1205   PetscInt       message_count,flowcontrolcount;
1206   FILE           *file;
1207 
1208   PetscFunctionBegin;
1209   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1210   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1211   nz   = A->nz + B->nz;
1212   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1213   if (!rank) {
1214     header[0] = MAT_FILE_CLASSID;
1215     header[1] = mat->rmap->N;
1216     header[2] = mat->cmap->N;
1217 
1218     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     /* get largest number of rows any processor has */
1221     rlen  = mat->rmap->n;
1222     range = mat->rmap->range;
1223     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1224   } else {
1225     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     rlen = mat->rmap->n;
1227   }
1228 
1229   /* load up the local row counts */
1230   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1231   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1232 
1233   /* store the row lengths to the file */
1234   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1235   if (!rank) {
1236     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1239       rlen = range[i+1] - range[i];
1240       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1246     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1248   }
1249   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1250 
1251   /* load up the local column indices */
1252   nzmax = nz; /* th processor needs space a largest processor needs */
1253   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1255   cnt   = 0;
1256   for (i=0; i<mat->rmap->n; i++) {
1257     for (j=B->i[i]; j<B->i[i+1]; j++) {
1258       if ((col = garray[B->j[j]]) > cstart) break;
1259       column_indices[cnt++] = col;
1260     }
1261     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1262     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1263   }
1264   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1265 
1266   /* store the column indices to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1275       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1277     }
1278     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1279   } else {
1280     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1281     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1284   }
1285   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1286 
1287   /* load up the local column values */
1288   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1289   cnt  = 0;
1290   for (i=0; i<mat->rmap->n; i++) {
1291     for (j=B->i[i]; j<B->i[i+1]; j++) {
1292       if (garray[B->j[j]] > cstart) break;
1293       column_values[cnt++] = B->a[j];
1294     }
1295     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1296     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1297   }
1298   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1299 
1300   /* store the column values to the file */
1301   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1302   if (!rank) {
1303     MPI_Status status;
1304     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1305     for (i=1; i<size; i++) {
1306       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1307       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1308       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1309       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1311     }
1312     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1313   } else {
1314     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1315     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1316     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1317     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1318   }
1319   ierr = PetscFree(column_values);CHKERRQ(ierr);
1320 
1321   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1322   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #include <petscdraw.h>
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1329 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1330 {
1331   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1332   PetscErrorCode    ierr;
1333   PetscMPIInt       rank = aij->rank,size = aij->size;
1334   PetscBool         isdraw,iascii,isbinary;
1335   PetscViewer       sviewer;
1336   PetscViewerFormat format;
1337 
1338   PetscFunctionBegin;
1339   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1340   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1341   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1342   if (iascii) {
1343     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1344     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1345       MatInfo   info;
1346       PetscBool inodes;
1347 
1348       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1349       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1350       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1351       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1352       if (!inodes) {
1353         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1354                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1355       } else {
1356         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1357                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1358       }
1359       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1361       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1362       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1363       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1365       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1366       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1367       PetscFunctionReturn(0);
1368     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1369       PetscInt inodecount,inodelimit,*inodes;
1370       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1371       if (inodes) {
1372         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1373       } else {
1374         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1375       }
1376       PetscFunctionReturn(0);
1377     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1378       PetscFunctionReturn(0);
1379     }
1380   } else if (isbinary) {
1381     if (size == 1) {
1382       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1383       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1384     } else {
1385       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1386     }
1387     PetscFunctionReturn(0);
1388   } else if (isdraw) {
1389     PetscDraw draw;
1390     PetscBool isnull;
1391     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1392     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1393   }
1394 
1395   {
1396     /* assemble the entire matrix onto first processor. */
1397     Mat        A;
1398     Mat_SeqAIJ *Aloc;
1399     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1400     MatScalar  *a;
1401 
1402     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1403     if (!rank) {
1404       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1405     } else {
1406       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1407     }
1408     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1409     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1410     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1411     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1412     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1413 
1414     /* copy over the A part */
1415     Aloc = (Mat_SeqAIJ*)aij->A->data;
1416     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1417     row  = mat->rmap->rstart;
1418     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1423     }
1424     aj = Aloc->j;
1425     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1426 
1427     /* copy over the B part */
1428     Aloc = (Mat_SeqAIJ*)aij->B->data;
1429     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1430     row  = mat->rmap->rstart;
1431     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1432     ct   = cols;
1433     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1434     for (i=0; i<m; i++) {
1435       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1436       row++;
1437       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1438     }
1439     ierr = PetscFree(ct);CHKERRQ(ierr);
1440     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1441     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1442     /*
1443        Everyone has to call to draw the matrix since the graphics waits are
1444        synchronized across all processors that share the PetscDraw object
1445     */
1446     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1447     if (!rank) {
1448       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1449       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1450     }
1451     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1452     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1453     ierr = MatDestroy(&A);CHKERRQ(ierr);
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ"
1460 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1461 {
1462   PetscErrorCode ierr;
1463   PetscBool      iascii,isdraw,issocket,isbinary;
1464 
1465   PetscFunctionBegin;
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1470   if (iascii || isdraw || isbinary || issocket) {
1471     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatSOR_MPIAIJ"
1478 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1479 {
1480   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1481   PetscErrorCode ierr;
1482   Vec            bb1 = 0;
1483   PetscBool      hasop;
1484 
1485   PetscFunctionBegin;
1486   if (flag == SOR_APPLY_UPPER) {
1487     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1488     PetscFunctionReturn(0);
1489   }
1490 
1491   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1492     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1493   }
1494 
1495   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1496     if (flag & SOR_ZERO_INITIAL_GUESS) {
1497       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1498       its--;
1499     }
1500 
1501     while (its--) {
1502       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504 
1505       /* update rhs: bb1 = bb - B*x */
1506       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1507       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1508 
1509       /* local sweep */
1510       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1511     }
1512   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1513     if (flag & SOR_ZERO_INITIAL_GUESS) {
1514       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1515       its--;
1516     }
1517     while (its--) {
1518       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520 
1521       /* update rhs: bb1 = bb - B*x */
1522       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1523       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1524 
1525       /* local sweep */
1526       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1527     }
1528   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1529     if (flag & SOR_ZERO_INITIAL_GUESS) {
1530       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1531       its--;
1532     }
1533     while (its--) {
1534       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536 
1537       /* update rhs: bb1 = bb - B*x */
1538       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1539       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1540 
1541       /* local sweep */
1542       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1543     }
1544   } else if (flag & SOR_EISENSTAT) {
1545     Vec xx1;
1546 
1547     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1548     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1549 
1550     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552     if (!mat->diag) {
1553       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1554       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1555     }
1556     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1557     if (hasop) {
1558       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1559     } else {
1560       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1561     }
1562     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1563 
1564     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1565 
1566     /* local sweep */
1567     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1568     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1569     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1570   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1571 
1572   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatPermute_MPIAIJ"
1578 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1579 {
1580   Mat            aA,aB,Aperm;
1581   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1582   PetscScalar    *aa,*ba;
1583   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1584   PetscSF        rowsf,sf;
1585   IS             parcolp = NULL;
1586   PetscBool      done;
1587   PetscErrorCode ierr;
1588 
1589   PetscFunctionBegin;
1590   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1591   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1592   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1593   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1594 
1595   /* Invert row permutation to find out where my rows should go */
1596   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1597   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1598   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1599   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1600   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1601   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1602 
1603   /* Invert column permutation to find out where my columns should go */
1604   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1605   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1606   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1607   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1608   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1609   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1610   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1611 
1612   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1613   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1614   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1615 
1616   /* Find out where my gcols should go */
1617   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1618   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1619   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1620   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1621   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1622   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1623   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1624   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1625 
1626   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1627   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1628   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1629   for (i=0; i<m; i++) {
1630     PetscInt row = rdest[i],rowner;
1631     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1632     for (j=ai[i]; j<ai[i+1]; j++) {
1633       PetscInt cowner,col = cdest[aj[j]];
1634       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1635       if (rowner == cowner) dnnz[i]++;
1636       else onnz[i]++;
1637     }
1638     for (j=bi[i]; j<bi[i+1]; j++) {
1639       PetscInt cowner,col = gcdest[bj[j]];
1640       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1641       if (rowner == cowner) dnnz[i]++;
1642       else onnz[i]++;
1643     }
1644   }
1645   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1646   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1647   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1648   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1649   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1650 
1651   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1652   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1653   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1654   for (i=0; i<m; i++) {
1655     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1656     PetscInt j0,rowlen;
1657     rowlen = ai[i+1] - ai[i];
1658     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1659       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1660       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1661     }
1662     rowlen = bi[i+1] - bi[i];
1663     for (j0=j=0; j<rowlen; j0=j) {
1664       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1665       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1666     }
1667   }
1668   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1671   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1672   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1673   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1674   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1675   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1676   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1677   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1678   *B = Aperm;
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1684 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1685 {
1686   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1687   PetscErrorCode ierr;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1691   if (ghosts) *ghosts = aij->garray;
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1697 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1698 {
1699   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1700   Mat            A    = mat->A,B = mat->B;
1701   PetscErrorCode ierr;
1702   PetscReal      isend[5],irecv[5];
1703 
1704   PetscFunctionBegin;
1705   info->block_size = 1.0;
1706   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1707 
1708   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1709   isend[3] = info->memory;  isend[4] = info->mallocs;
1710 
1711   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1712 
1713   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1714   isend[3] += info->memory;  isend[4] += info->mallocs;
1715   if (flag == MAT_LOCAL) {
1716     info->nz_used      = isend[0];
1717     info->nz_allocated = isend[1];
1718     info->nz_unneeded  = isend[2];
1719     info->memory       = isend[3];
1720     info->mallocs      = isend[4];
1721   } else if (flag == MAT_GLOBAL_MAX) {
1722     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1723 
1724     info->nz_used      = irecv[0];
1725     info->nz_allocated = irecv[1];
1726     info->nz_unneeded  = irecv[2];
1727     info->memory       = irecv[3];
1728     info->mallocs      = irecv[4];
1729   } else if (flag == MAT_GLOBAL_SUM) {
1730     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1731 
1732     info->nz_used      = irecv[0];
1733     info->nz_allocated = irecv[1];
1734     info->nz_unneeded  = irecv[2];
1735     info->memory       = irecv[3];
1736     info->mallocs      = irecv[4];
1737   }
1738   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1739   info->fill_ratio_needed = 0;
1740   info->factor_mallocs    = 0;
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatSetOption_MPIAIJ"
1746 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1747 {
1748   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1749   PetscErrorCode ierr;
1750 
1751   PetscFunctionBegin;
1752   switch (op) {
1753   case MAT_NEW_NONZERO_LOCATIONS:
1754   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1755   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1756   case MAT_KEEP_NONZERO_PATTERN:
1757   case MAT_NEW_NONZERO_LOCATION_ERR:
1758   case MAT_USE_INODES:
1759   case MAT_IGNORE_ZERO_ENTRIES:
1760     MatCheckPreallocated(A,1);
1761     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1762     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1763     break;
1764   case MAT_ROW_ORIENTED:
1765     a->roworiented = flg;
1766 
1767     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1768     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1769     break;
1770   case MAT_NEW_DIAGONALS:
1771     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1772     break;
1773   case MAT_IGNORE_OFF_PROC_ENTRIES:
1774     a->donotstash = flg;
1775     break;
1776   case MAT_SPD:
1777     A->spd_set = PETSC_TRUE;
1778     A->spd     = flg;
1779     if (flg) {
1780       A->symmetric                  = PETSC_TRUE;
1781       A->structurally_symmetric     = PETSC_TRUE;
1782       A->symmetric_set              = PETSC_TRUE;
1783       A->structurally_symmetric_set = PETSC_TRUE;
1784     }
1785     break;
1786   case MAT_SYMMETRIC:
1787     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1788     break;
1789   case MAT_STRUCTURALLY_SYMMETRIC:
1790     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1791     break;
1792   case MAT_HERMITIAN:
1793     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1794     break;
1795   case MAT_SYMMETRY_ETERNAL:
1796     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1797     break;
1798   default:
1799     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatGetRow_MPIAIJ"
1806 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1807 {
1808   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1809   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1810   PetscErrorCode ierr;
1811   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1812   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1813   PetscInt       *cmap,*idx_p;
1814 
1815   PetscFunctionBegin;
1816   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1817   mat->getrowactive = PETSC_TRUE;
1818 
1819   if (!mat->rowvalues && (idx || v)) {
1820     /*
1821         allocate enough space to hold information from the longest row.
1822     */
1823     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1824     PetscInt   max = 1,tmp;
1825     for (i=0; i<matin->rmap->n; i++) {
1826       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1827       if (max < tmp) max = tmp;
1828     }
1829     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1830   }
1831 
1832   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1833   lrow = row - rstart;
1834 
1835   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1836   if (!v)   {pvA = 0; pvB = 0;}
1837   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1838   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1839   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1840   nztot = nzA + nzB;
1841 
1842   cmap = mat->garray;
1843   if (v  || idx) {
1844     if (nztot) {
1845       /* Sort by increasing column numbers, assuming A and B already sorted */
1846       PetscInt imark = -1;
1847       if (v) {
1848         *v = v_p = mat->rowvalues;
1849         for (i=0; i<nzB; i++) {
1850           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1851           else break;
1852         }
1853         imark = i;
1854         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1855         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1856       }
1857       if (idx) {
1858         *idx = idx_p = mat->rowindices;
1859         if (imark > -1) {
1860           for (i=0; i<imark; i++) {
1861             idx_p[i] = cmap[cworkB[i]];
1862           }
1863         } else {
1864           for (i=0; i<nzB; i++) {
1865             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1866             else break;
1867           }
1868           imark = i;
1869         }
1870         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1871         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1872       }
1873     } else {
1874       if (idx) *idx = 0;
1875       if (v)   *v   = 0;
1876     }
1877   }
1878   *nz  = nztot;
1879   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1880   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1886 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1887 {
1888   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1889 
1890   PetscFunctionBegin;
1891   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1892   aij->getrowactive = PETSC_FALSE;
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatNorm_MPIAIJ"
1898 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1899 {
1900   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1901   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1902   PetscErrorCode ierr;
1903   PetscInt       i,j,cstart = mat->cmap->rstart;
1904   PetscReal      sum = 0.0;
1905   MatScalar      *v;
1906 
1907   PetscFunctionBegin;
1908   if (aij->size == 1) {
1909     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1910   } else {
1911     if (type == NORM_FROBENIUS) {
1912       v = amat->a;
1913       for (i=0; i<amat->nz; i++) {
1914         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1915       }
1916       v = bmat->a;
1917       for (i=0; i<bmat->nz; i++) {
1918         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1919       }
1920       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1921       *norm = PetscSqrtReal(*norm);
1922     } else if (type == NORM_1) { /* max column norm */
1923       PetscReal *tmp,*tmp2;
1924       PetscInt  *jj,*garray = aij->garray;
1925       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1926       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1927       *norm = 0.0;
1928       v     = amat->a; jj = amat->j;
1929       for (j=0; j<amat->nz; j++) {
1930         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1931       }
1932       v = bmat->a; jj = bmat->j;
1933       for (j=0; j<bmat->nz; j++) {
1934         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1935       }
1936       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1937       for (j=0; j<mat->cmap->N; j++) {
1938         if (tmp2[j] > *norm) *norm = tmp2[j];
1939       }
1940       ierr = PetscFree(tmp);CHKERRQ(ierr);
1941       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1942     } else if (type == NORM_INFINITY) { /* max row norm */
1943       PetscReal ntemp = 0.0;
1944       for (j=0; j<aij->A->rmap->n; j++) {
1945         v   = amat->a + amat->i[j];
1946         sum = 0.0;
1947         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1948           sum += PetscAbsScalar(*v); v++;
1949         }
1950         v = bmat->a + bmat->i[j];
1951         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1952           sum += PetscAbsScalar(*v); v++;
1953         }
1954         if (sum > ntemp) ntemp = sum;
1955       }
1956       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1957     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1958   }
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatTranspose_MPIAIJ"
1964 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1965 {
1966   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1967   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1968   PetscErrorCode ierr;
1969   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1970   PetscInt       cstart = A->cmap->rstart,ncol;
1971   Mat            B;
1972   MatScalar      *array;
1973 
1974   PetscFunctionBegin;
1975   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1976 
1977   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1978   ai = Aloc->i; aj = Aloc->j;
1979   bi = Bloc->i; bj = Bloc->j;
1980   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1981     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1982     PetscSFNode          *oloc;
1983     PETSC_UNUSED PetscSF sf;
1984 
1985     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1986     /* compute d_nnz for preallocation */
1987     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1988     for (i=0; i<ai[ma]; i++) {
1989       d_nnz[aj[i]]++;
1990       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1991     }
1992     /* compute local off-diagonal contributions */
1993     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1994     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1995     /* map those to global */
1996     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1997     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1998     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1999     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2000     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2001     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2002     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2003 
2004     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2005     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2006     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2007     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2008     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2009     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2010   } else {
2011     B    = *matout;
2012     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2013     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2014   }
2015 
2016   /* copy over the A part */
2017   array = Aloc->a;
2018   row   = A->rmap->rstart;
2019   for (i=0; i<ma; i++) {
2020     ncol = ai[i+1]-ai[i];
2021     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2022     row++;
2023     array += ncol; aj += ncol;
2024   }
2025   aj = Aloc->j;
2026   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2027 
2028   /* copy over the B part */
2029   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2030   array = Bloc->a;
2031   row   = A->rmap->rstart;
2032   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2033   cols_tmp = cols;
2034   for (i=0; i<mb; i++) {
2035     ncol = bi[i+1]-bi[i];
2036     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2037     row++;
2038     array += ncol; cols_tmp += ncol;
2039   }
2040   ierr = PetscFree(cols);CHKERRQ(ierr);
2041 
2042   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2043   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2044   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2045     *matout = B;
2046   } else {
2047     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2048   }
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2054 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2055 {
2056   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2057   Mat            a    = aij->A,b = aij->B;
2058   PetscErrorCode ierr;
2059   PetscInt       s1,s2,s3;
2060 
2061   PetscFunctionBegin;
2062   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2063   if (rr) {
2064     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2065     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2066     /* Overlap communication with computation. */
2067     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2068   }
2069   if (ll) {
2070     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2071     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2072     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2073   }
2074   /* scale  the diagonal block */
2075   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2076 
2077   if (rr) {
2078     /* Do a scatter end and then right scale the off-diagonal block */
2079     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2080     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2081   }
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2087 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2088 {
2089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2090   PetscErrorCode ierr;
2091 
2092   PetscFunctionBegin;
2093   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatEqual_MPIAIJ"
2099 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2100 {
2101   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2102   Mat            a,b,c,d;
2103   PetscBool      flg;
2104   PetscErrorCode ierr;
2105 
2106   PetscFunctionBegin;
2107   a = matA->A; b = matA->B;
2108   c = matB->A; d = matB->B;
2109 
2110   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2111   if (flg) {
2112     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2113   }
2114   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatCopy_MPIAIJ"
2120 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2121 {
2122   PetscErrorCode ierr;
2123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2124   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2125 
2126   PetscFunctionBegin;
2127   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2128   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2129     /* because of the column compression in the off-processor part of the matrix a->B,
2130        the number of columns in a->B and b->B may be different, hence we cannot call
2131        the MatCopy() directly on the two parts. If need be, we can provide a more
2132        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2133        then copying the submatrices */
2134     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2135   } else {
2136     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2137     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2138   }
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatSetUp_MPIAIJ"
2144 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2145 {
2146   PetscErrorCode ierr;
2147 
2148   PetscFunctionBegin;
2149   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 /*
2154    Computes the number of nonzeros per row needed for preallocation when X and Y
2155    have different nonzero structure.
2156 */
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2159 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2160 {
2161   PetscInt       i,j,k,nzx,nzy;
2162 
2163   PetscFunctionBegin;
2164   /* Set the number of nonzeros in the new matrix */
2165   for (i=0; i<m; i++) {
2166     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2167     nzx = xi[i+1] - xi[i];
2168     nzy = yi[i+1] - yi[i];
2169     nnz[i] = 0;
2170     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2171       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2172       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2173       nnz[i]++;
2174     }
2175     for (; k<nzy; k++) nnz[i]++;
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2181 #undef __FUNCT__
2182 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2183 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2184 {
2185   PetscErrorCode ierr;
2186   PetscInt       m = Y->rmap->N;
2187   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2188   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2189 
2190   PetscFunctionBegin;
2191   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatAXPY_MPIAIJ"
2197 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2198 {
2199   PetscErrorCode ierr;
2200   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2201   PetscBLASInt   bnz,one=1;
2202   Mat_SeqAIJ     *x,*y;
2203 
2204   PetscFunctionBegin;
2205   if (str == SAME_NONZERO_PATTERN) {
2206     PetscScalar alpha = a;
2207     x    = (Mat_SeqAIJ*)xx->A->data;
2208     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2209     y    = (Mat_SeqAIJ*)yy->A->data;
2210     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2211     x    = (Mat_SeqAIJ*)xx->B->data;
2212     y    = (Mat_SeqAIJ*)yy->B->data;
2213     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2214     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2215     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2216   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2217     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2218   } else {
2219     Mat      B;
2220     PetscInt *nnz_d,*nnz_o;
2221     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2222     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2223     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2224     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2225     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2226     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2227     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2228     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2229     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2230     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2231     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2232     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2233     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2234     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2235   }
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2240 
2241 #undef __FUNCT__
2242 #define __FUNCT__ "MatConjugate_MPIAIJ"
2243 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2244 {
2245 #if defined(PETSC_USE_COMPLEX)
2246   PetscErrorCode ierr;
2247   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2248 
2249   PetscFunctionBegin;
2250   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2251   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2252 #else
2253   PetscFunctionBegin;
2254 #endif
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatRealPart_MPIAIJ"
2260 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2261 {
2262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2263   PetscErrorCode ierr;
2264 
2265   PetscFunctionBegin;
2266   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2267   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2273 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2274 {
2275   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2276   PetscErrorCode ierr;
2277 
2278   PetscFunctionBegin;
2279   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2280   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2286 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2287 {
2288   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2289   PetscErrorCode ierr;
2290   PetscInt       i,*idxb = 0;
2291   PetscScalar    *va,*vb;
2292   Vec            vtmp;
2293 
2294   PetscFunctionBegin;
2295   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2296   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2297   if (idx) {
2298     for (i=0; i<A->rmap->n; i++) {
2299       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2300     }
2301   }
2302 
2303   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2304   if (idx) {
2305     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2306   }
2307   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2308   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2309 
2310   for (i=0; i<A->rmap->n; i++) {
2311     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2312       va[i] = vb[i];
2313       if (idx) idx[i] = a->garray[idxb[i]];
2314     }
2315   }
2316 
2317   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2318   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2319   ierr = PetscFree(idxb);CHKERRQ(ierr);
2320   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 #undef __FUNCT__
2325 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2326 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2327 {
2328   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2329   PetscErrorCode ierr;
2330   PetscInt       i,*idxb = 0;
2331   PetscScalar    *va,*vb;
2332   Vec            vtmp;
2333 
2334   PetscFunctionBegin;
2335   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2336   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2337   if (idx) {
2338     for (i=0; i<A->cmap->n; i++) {
2339       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2340     }
2341   }
2342 
2343   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2344   if (idx) {
2345     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2346   }
2347   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2348   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2349 
2350   for (i=0; i<A->rmap->n; i++) {
2351     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2352       va[i] = vb[i];
2353       if (idx) idx[i] = a->garray[idxb[i]];
2354     }
2355   }
2356 
2357   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2358   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2359   ierr = PetscFree(idxb);CHKERRQ(ierr);
2360   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2366 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2367 {
2368   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2369   PetscInt       n      = A->rmap->n;
2370   PetscInt       cstart = A->cmap->rstart;
2371   PetscInt       *cmap  = mat->garray;
2372   PetscInt       *diagIdx, *offdiagIdx;
2373   Vec            diagV, offdiagV;
2374   PetscScalar    *a, *diagA, *offdiagA;
2375   PetscInt       r;
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2380   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2381   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2382   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2383   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2384   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2385   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2386   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2387   for (r = 0; r < n; ++r) {
2388     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2389       a[r]   = diagA[r];
2390       idx[r] = cstart + diagIdx[r];
2391     } else {
2392       a[r]   = offdiagA[r];
2393       idx[r] = cmap[offdiagIdx[r]];
2394     }
2395   }
2396   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2397   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2398   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2399   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2400   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2401   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2407 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2408 {
2409   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2410   PetscInt       n      = A->rmap->n;
2411   PetscInt       cstart = A->cmap->rstart;
2412   PetscInt       *cmap  = mat->garray;
2413   PetscInt       *diagIdx, *offdiagIdx;
2414   Vec            diagV, offdiagV;
2415   PetscScalar    *a, *diagA, *offdiagA;
2416   PetscInt       r;
2417   PetscErrorCode ierr;
2418 
2419   PetscFunctionBegin;
2420   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2421   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2422   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2423   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2424   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2425   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2426   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2427   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2428   for (r = 0; r < n; ++r) {
2429     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2430       a[r]   = diagA[r];
2431       idx[r] = cstart + diagIdx[r];
2432     } else {
2433       a[r]   = offdiagA[r];
2434       idx[r] = cmap[offdiagIdx[r]];
2435     }
2436   }
2437   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2438   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2439   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2440   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2441   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2442   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 #undef __FUNCT__
2447 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2448 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2449 {
2450   PetscErrorCode ierr;
2451   Mat            *dummy;
2452 
2453   PetscFunctionBegin;
2454   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2455   *newmat = *dummy;
2456   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 #undef __FUNCT__
2461 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2462 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2463 {
2464   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2465   PetscErrorCode ierr;
2466 
2467   PetscFunctionBegin;
2468   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 #undef __FUNCT__
2473 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2474 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2475 {
2476   PetscErrorCode ierr;
2477   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2478 
2479   PetscFunctionBegin;
2480   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2481   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2482   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2483   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 #undef __FUNCT__
2488 #define __FUNCT__ "MatShift_MPIAIJ"
2489 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2490 {
2491   PetscErrorCode ierr;
2492   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2493   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2494 
2495   PetscFunctionBegin;
2496   if (!Y->preallocated) {
2497     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2498   } else if (!aij->nz) {
2499     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2500   }
2501   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 /* -------------------------------------------------------------------*/
2506 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2507                                        MatGetRow_MPIAIJ,
2508                                        MatRestoreRow_MPIAIJ,
2509                                        MatMult_MPIAIJ,
2510                                 /* 4*/ MatMultAdd_MPIAIJ,
2511                                        MatMultTranspose_MPIAIJ,
2512                                        MatMultTransposeAdd_MPIAIJ,
2513                                        0,
2514                                        0,
2515                                        0,
2516                                 /*10*/ 0,
2517                                        0,
2518                                        0,
2519                                        MatSOR_MPIAIJ,
2520                                        MatTranspose_MPIAIJ,
2521                                 /*15*/ MatGetInfo_MPIAIJ,
2522                                        MatEqual_MPIAIJ,
2523                                        MatGetDiagonal_MPIAIJ,
2524                                        MatDiagonalScale_MPIAIJ,
2525                                        MatNorm_MPIAIJ,
2526                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2527                                        MatAssemblyEnd_MPIAIJ,
2528                                        MatSetOption_MPIAIJ,
2529                                        MatZeroEntries_MPIAIJ,
2530                                 /*24*/ MatZeroRows_MPIAIJ,
2531                                        0,
2532                                        0,
2533                                        0,
2534                                        0,
2535                                 /*29*/ MatSetUp_MPIAIJ,
2536                                        0,
2537                                        0,
2538                                        0,
2539                                        0,
2540                                 /*34*/ MatDuplicate_MPIAIJ,
2541                                        0,
2542                                        0,
2543                                        0,
2544                                        0,
2545                                 /*39*/ MatAXPY_MPIAIJ,
2546                                        MatGetSubMatrices_MPIAIJ,
2547                                        MatIncreaseOverlap_MPIAIJ,
2548                                        MatGetValues_MPIAIJ,
2549                                        MatCopy_MPIAIJ,
2550                                 /*44*/ MatGetRowMax_MPIAIJ,
2551                                        MatScale_MPIAIJ,
2552                                        MatShift_MPIAIJ,
2553                                        MatDiagonalSet_MPIAIJ,
2554                                        MatZeroRowsColumns_MPIAIJ,
2555                                 /*49*/ MatSetRandom_MPIAIJ,
2556                                        0,
2557                                        0,
2558                                        0,
2559                                        0,
2560                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2561                                        0,
2562                                        MatSetUnfactored_MPIAIJ,
2563                                        MatPermute_MPIAIJ,
2564                                        0,
2565                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2566                                        MatDestroy_MPIAIJ,
2567                                        MatView_MPIAIJ,
2568                                        0,
2569                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2570                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2571                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2572                                        0,
2573                                        0,
2574                                        0,
2575                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2576                                        MatGetRowMinAbs_MPIAIJ,
2577                                        0,
2578                                        MatSetColoring_MPIAIJ,
2579                                        0,
2580                                        MatSetValuesAdifor_MPIAIJ,
2581                                 /*75*/ MatFDColoringApply_AIJ,
2582                                        0,
2583                                        0,
2584                                        0,
2585                                        MatFindZeroDiagonals_MPIAIJ,
2586                                 /*80*/ 0,
2587                                        0,
2588                                        0,
2589                                 /*83*/ MatLoad_MPIAIJ,
2590                                        0,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2596                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2597                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2598                                        MatPtAP_MPIAIJ_MPIAIJ,
2599                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2600                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                 /*99*/ 0,
2606                                        0,
2607                                        0,
2608                                        MatConjugate_MPIAIJ,
2609                                        0,
2610                                 /*104*/MatSetValuesRow_MPIAIJ,
2611                                        MatRealPart_MPIAIJ,
2612                                        MatImaginaryPart_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                 /*109*/0,
2616                                        0,
2617                                        MatGetRowMin_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2621                                        0,
2622                                        MatGetGhosts_MPIAIJ,
2623                                        0,
2624                                        0,
2625                                 /*119*/0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                        MatGetMultiProcBlock_MPIAIJ,
2630                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2631                                        MatGetColumnNorms_MPIAIJ,
2632                                        MatInvertBlockDiagonal_MPIAIJ,
2633                                        0,
2634                                        MatGetSubMatricesMPI_MPIAIJ,
2635                                 /*129*/0,
2636                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2637                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2638                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2639                                        0,
2640                                 /*134*/0,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                 /*139*/0,
2646                                        0,
2647                                        0,
2648                                        MatFDColoringSetUp_MPIXAIJ,
2649                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2650                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2651 };
2652 
2653 /* ----------------------------------------------------------------------------------------*/
2654 
2655 #undef __FUNCT__
2656 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2657 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2658 {
2659   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2660   PetscErrorCode ierr;
2661 
2662   PetscFunctionBegin;
2663   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2664   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 #undef __FUNCT__
2669 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2670 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2671 {
2672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2673   PetscErrorCode ierr;
2674 
2675   PetscFunctionBegin;
2676   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2677   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2683 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2684 {
2685   Mat_MPIAIJ     *b;
2686   PetscErrorCode ierr;
2687 
2688   PetscFunctionBegin;
2689   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2690   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2691   b = (Mat_MPIAIJ*)B->data;
2692 
2693   if (!B->preallocated) {
2694     /* Explicitly create 2 MATSEQAIJ matrices. */
2695     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2696     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2697     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2698     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2699     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2700     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2701     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2702     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2703     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2704     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2705   }
2706 
2707   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2708   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2709   B->preallocated = PETSC_TRUE;
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 #undef __FUNCT__
2714 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2715 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2716 {
2717   Mat            mat;
2718   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2719   PetscErrorCode ierr;
2720 
2721   PetscFunctionBegin;
2722   *newmat = 0;
2723   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2724   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2725   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2726   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2727   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2728   a       = (Mat_MPIAIJ*)mat->data;
2729 
2730   mat->factortype   = matin->factortype;
2731   mat->assembled    = PETSC_TRUE;
2732   mat->insertmode   = NOT_SET_VALUES;
2733   mat->preallocated = PETSC_TRUE;
2734 
2735   a->size         = oldmat->size;
2736   a->rank         = oldmat->rank;
2737   a->donotstash   = oldmat->donotstash;
2738   a->roworiented  = oldmat->roworiented;
2739   a->rowindices   = 0;
2740   a->rowvalues    = 0;
2741   a->getrowactive = PETSC_FALSE;
2742 
2743   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2744   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2745 
2746   if (oldmat->colmap) {
2747 #if defined(PETSC_USE_CTABLE)
2748     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2749 #else
2750     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2751     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2752     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2753 #endif
2754   } else a->colmap = 0;
2755   if (oldmat->garray) {
2756     PetscInt len;
2757     len  = oldmat->B->cmap->n;
2758     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2759     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2760     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2761   } else a->garray = 0;
2762 
2763   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2764   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2765   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2766   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2767   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2768   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2769   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2770   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2771   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2772   *newmat = mat;
2773   PetscFunctionReturn(0);
2774 }
2775 
2776 
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatLoad_MPIAIJ"
2780 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2781 {
2782   PetscScalar    *vals,*svals;
2783   MPI_Comm       comm;
2784   PetscErrorCode ierr;
2785   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2786   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2787   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2788   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2789   PetscInt       cend,cstart,n,*rowners;
2790   int            fd;
2791   PetscInt       bs = newMat->rmap->bs;
2792 
2793   PetscFunctionBegin;
2794   /* force binary viewer to load .info file if it has not yet done so */
2795   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2796   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2798   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2799   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2800   if (!rank) {
2801     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2802     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2803   }
2804 
2805   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2806   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2807   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2808   if (bs < 0) bs = 1;
2809 
2810   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2811   M    = header[1]; N = header[2];
2812 
2813   /* If global sizes are set, check if they are consistent with that given in the file */
2814   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2815   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2816 
2817   /* determine ownership of all (block) rows */
2818   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2819   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2820   else m = newMat->rmap->n; /* Set by user */
2821 
2822   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2823   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2824 
2825   /* First process needs enough room for process with most rows */
2826   if (!rank) {
2827     mmax = rowners[1];
2828     for (i=2; i<=size; i++) {
2829       mmax = PetscMax(mmax, rowners[i]);
2830     }
2831   } else mmax = -1;             /* unused, but compilers complain */
2832 
2833   rowners[0] = 0;
2834   for (i=2; i<=size; i++) {
2835     rowners[i] += rowners[i-1];
2836   }
2837   rstart = rowners[rank];
2838   rend   = rowners[rank+1];
2839 
2840   /* distribute row lengths to all processors */
2841   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2842   if (!rank) {
2843     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2844     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2845     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2846     for (j=0; j<m; j++) {
2847       procsnz[0] += ourlens[j];
2848     }
2849     for (i=1; i<size; i++) {
2850       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2851       /* calculate the number of nonzeros on each processor */
2852       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2853         procsnz[i] += rowlengths[j];
2854       }
2855       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2856     }
2857     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2858   } else {
2859     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2860   }
2861 
2862   if (!rank) {
2863     /* determine max buffer needed and allocate it */
2864     maxnz = 0;
2865     for (i=0; i<size; i++) {
2866       maxnz = PetscMax(maxnz,procsnz[i]);
2867     }
2868     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2869 
2870     /* read in my part of the matrix column indices  */
2871     nz   = procsnz[0];
2872     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2873     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2874 
2875     /* read in every one elses and ship off */
2876     for (i=1; i<size; i++) {
2877       nz   = procsnz[i];
2878       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2879       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2880     }
2881     ierr = PetscFree(cols);CHKERRQ(ierr);
2882   } else {
2883     /* determine buffer space needed for message */
2884     nz = 0;
2885     for (i=0; i<m; i++) {
2886       nz += ourlens[i];
2887     }
2888     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2889 
2890     /* receive message of column indices*/
2891     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2892   }
2893 
2894   /* determine column ownership if matrix is not square */
2895   if (N != M) {
2896     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2897     else n = newMat->cmap->n;
2898     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2899     cstart = cend - n;
2900   } else {
2901     cstart = rstart;
2902     cend   = rend;
2903     n      = cend - cstart;
2904   }
2905 
2906   /* loop over local rows, determining number of off diagonal entries */
2907   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2908   jj   = 0;
2909   for (i=0; i<m; i++) {
2910     for (j=0; j<ourlens[i]; j++) {
2911       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2912       jj++;
2913     }
2914   }
2915 
2916   for (i=0; i<m; i++) {
2917     ourlens[i] -= offlens[i];
2918   }
2919   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2920 
2921   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2922 
2923   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2924 
2925   for (i=0; i<m; i++) {
2926     ourlens[i] += offlens[i];
2927   }
2928 
2929   if (!rank) {
2930     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2931 
2932     /* read in my part of the matrix numerical values  */
2933     nz   = procsnz[0];
2934     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2935 
2936     /* insert into matrix */
2937     jj      = rstart;
2938     smycols = mycols;
2939     svals   = vals;
2940     for (i=0; i<m; i++) {
2941       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2942       smycols += ourlens[i];
2943       svals   += ourlens[i];
2944       jj++;
2945     }
2946 
2947     /* read in other processors and ship out */
2948     for (i=1; i<size; i++) {
2949       nz   = procsnz[i];
2950       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2951       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2952     }
2953     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2954   } else {
2955     /* receive numeric values */
2956     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2957 
2958     /* receive message of values*/
2959     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2960 
2961     /* insert into matrix */
2962     jj      = rstart;
2963     smycols = mycols;
2964     svals   = vals;
2965     for (i=0; i<m; i++) {
2966       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2967       smycols += ourlens[i];
2968       svals   += ourlens[i];
2969       jj++;
2970     }
2971   }
2972   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2973   ierr = PetscFree(vals);CHKERRQ(ierr);
2974   ierr = PetscFree(mycols);CHKERRQ(ierr);
2975   ierr = PetscFree(rowners);CHKERRQ(ierr);
2976   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2977   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 #undef __FUNCT__
2982 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2983 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
2984 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2985 {
2986   PetscErrorCode ierr;
2987   IS             iscol_local;
2988   PetscInt       csize;
2989 
2990   PetscFunctionBegin;
2991   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2992   if (call == MAT_REUSE_MATRIX) {
2993     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2994     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2995   } else {
2996     /* check if we are grabbing all columns*/
2997     PetscBool    isstride;
2998     PetscMPIInt  lisstride = 0,gisstride;
2999     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3000     if (isstride) {
3001       PetscInt  start,len,mstart,mlen;
3002       ierr = ISStrideGetInfo(iscol,&start,&len);CHKERRQ(ierr);
3003       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3004       if (mstart == start && mlen == len) lisstride = 1;
3005     }
3006     ierr = MPI_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3007     if (gisstride) {
3008       PetscInt N;
3009       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3010       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3011       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3012       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3013     } else {
3014       PetscInt cbs;
3015       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3016       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3017       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3018     }
3019   }
3020   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3021   if (call == MAT_INITIAL_MATRIX) {
3022     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3023     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3024   }
3025   PetscFunctionReturn(0);
3026 }
3027 
3028 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3029 #undef __FUNCT__
3030 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3031 /*
3032     Not great since it makes two copies of the submatrix, first an SeqAIJ
3033   in local and then by concatenating the local matrices the end result.
3034   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3035 
3036   Note: This requires a sequential iscol with all indices.
3037 */
3038 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3039 {
3040   PetscErrorCode ierr;
3041   PetscMPIInt    rank,size;
3042   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3043   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3044   PetscBool      allcolumns, colflag;
3045   Mat            M,Mreuse;
3046   MatScalar      *vwork,*aa;
3047   MPI_Comm       comm;
3048   Mat_SeqAIJ     *aij;
3049 
3050   PetscFunctionBegin;
3051   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3052   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3053   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3054 
3055   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3056   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3057   if (colflag && ncol == mat->cmap->N) {
3058     allcolumns = PETSC_TRUE;
3059     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3060   } else {
3061     allcolumns = PETSC_FALSE;
3062   }
3063   if (call ==  MAT_REUSE_MATRIX) {
3064     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3065     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3066     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3067   } else {
3068     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3069   }
3070 
3071   /*
3072       m - number of local rows
3073       n - number of columns (same on all processors)
3074       rstart - first row in new global matrix generated
3075   */
3076   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3077   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3078   if (call == MAT_INITIAL_MATRIX) {
3079     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3080     ii  = aij->i;
3081     jj  = aij->j;
3082 
3083     /*
3084         Determine the number of non-zeros in the diagonal and off-diagonal
3085         portions of the matrix in order to do correct preallocation
3086     */
3087 
3088     /* first get start and end of "diagonal" columns */
3089     if (csize == PETSC_DECIDE) {
3090       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3091       if (mglobal == n) { /* square matrix */
3092         nlocal = m;
3093       } else {
3094         nlocal = n/size + ((n % size) > rank);
3095       }
3096     } else {
3097       nlocal = csize;
3098     }
3099     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3100     rstart = rend - nlocal;
3101     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3102 
3103     /* next, compute all the lengths */
3104     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3105     olens = dlens + m;
3106     for (i=0; i<m; i++) {
3107       jend = ii[i+1] - ii[i];
3108       olen = 0;
3109       dlen = 0;
3110       for (j=0; j<jend; j++) {
3111         if (*jj < rstart || *jj >= rend) olen++;
3112         else dlen++;
3113         jj++;
3114       }
3115       olens[i] = olen;
3116       dlens[i] = dlen;
3117     }
3118     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3119     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3120     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3121     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3122     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3123     ierr = PetscFree(dlens);CHKERRQ(ierr);
3124   } else {
3125     PetscInt ml,nl;
3126 
3127     M    = *newmat;
3128     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3129     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3130     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3131     /*
3132          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3133        rather than the slower MatSetValues().
3134     */
3135     M->was_assembled = PETSC_TRUE;
3136     M->assembled     = PETSC_FALSE;
3137   }
3138   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3139   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3140   ii   = aij->i;
3141   jj   = aij->j;
3142   aa   = aij->a;
3143   for (i=0; i<m; i++) {
3144     row   = rstart + i;
3145     nz    = ii[i+1] - ii[i];
3146     cwork = jj;     jj += nz;
3147     vwork = aa;     aa += nz;
3148     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3149   }
3150 
3151   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3152   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3153   *newmat = M;
3154 
3155   /* save submatrix used in processor for next request */
3156   if (call ==  MAT_INITIAL_MATRIX) {
3157     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3158     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3159   }
3160   PetscFunctionReturn(0);
3161 }
3162 
3163 #undef __FUNCT__
3164 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3165 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3166 {
3167   PetscInt       m,cstart, cend,j,nnz,i,d;
3168   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3169   const PetscInt *JJ;
3170   PetscScalar    *values;
3171   PetscErrorCode ierr;
3172 
3173   PetscFunctionBegin;
3174   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3175 
3176   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3177   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3178   m      = B->rmap->n;
3179   cstart = B->cmap->rstart;
3180   cend   = B->cmap->rend;
3181   rstart = B->rmap->rstart;
3182 
3183   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3184 
3185 #if defined(PETSC_USE_DEBUGGING)
3186   for (i=0; i<m; i++) {
3187     nnz = Ii[i+1]- Ii[i];
3188     JJ  = J + Ii[i];
3189     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3190     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3191     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3192   }
3193 #endif
3194 
3195   for (i=0; i<m; i++) {
3196     nnz     = Ii[i+1]- Ii[i];
3197     JJ      = J + Ii[i];
3198     nnz_max = PetscMax(nnz_max,nnz);
3199     d       = 0;
3200     for (j=0; j<nnz; j++) {
3201       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3202     }
3203     d_nnz[i] = d;
3204     o_nnz[i] = nnz - d;
3205   }
3206   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3207   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3208 
3209   if (v) values = (PetscScalar*)v;
3210   else {
3211     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3212   }
3213 
3214   for (i=0; i<m; i++) {
3215     ii   = i + rstart;
3216     nnz  = Ii[i+1]- Ii[i];
3217     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3218   }
3219   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3220   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3221 
3222   if (!v) {
3223     ierr = PetscFree(values);CHKERRQ(ierr);
3224   }
3225   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3226   PetscFunctionReturn(0);
3227 }
3228 
3229 #undef __FUNCT__
3230 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3231 /*@
3232    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3233    (the default parallel PETSc format).
3234 
3235    Collective on MPI_Comm
3236 
3237    Input Parameters:
3238 +  B - the matrix
3239 .  i - the indices into j for the start of each local row (starts with zero)
3240 .  j - the column indices for each local row (starts with zero)
3241 -  v - optional values in the matrix
3242 
3243    Level: developer
3244 
3245    Notes:
3246        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3247      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3248      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3249 
3250        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3251 
3252        The format which is used for the sparse matrix input, is equivalent to a
3253     row-major ordering.. i.e for the following matrix, the input data expected is
3254     as shown
3255 
3256 $        1 0 0
3257 $        2 0 3     P0
3258 $       -------
3259 $        4 5 6     P1
3260 $
3261 $     Process0 [P0]: rows_owned=[0,1]
3262 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3263 $        j =  {0,0,2}  [size = 3]
3264 $        v =  {1,2,3}  [size = 3]
3265 $
3266 $     Process1 [P1]: rows_owned=[2]
3267 $        i =  {0,3}    [size = nrow+1  = 1+1]
3268 $        j =  {0,1,2}  [size = 3]
3269 $        v =  {4,5,6}  [size = 3]
3270 
3271 .keywords: matrix, aij, compressed row, sparse, parallel
3272 
3273 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3274           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3275 @*/
3276 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3277 {
3278   PetscErrorCode ierr;
3279 
3280   PetscFunctionBegin;
3281   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3282   PetscFunctionReturn(0);
3283 }
3284 
3285 #undef __FUNCT__
3286 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3287 /*@C
3288    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3289    (the default parallel PETSc format).  For good matrix assembly performance
3290    the user should preallocate the matrix storage by setting the parameters
3291    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3292    performance can be increased by more than a factor of 50.
3293 
3294    Collective on MPI_Comm
3295 
3296    Input Parameters:
3297 +  B - the matrix
3298 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3299            (same value is used for all local rows)
3300 .  d_nnz - array containing the number of nonzeros in the various rows of the
3301            DIAGONAL portion of the local submatrix (possibly different for each row)
3302            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3303            The size of this array is equal to the number of local rows, i.e 'm'.
3304            For matrices that will be factored, you must leave room for (and set)
3305            the diagonal entry even if it is zero.
3306 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3307            submatrix (same value is used for all local rows).
3308 -  o_nnz - array containing the number of nonzeros in the various rows of the
3309            OFF-DIAGONAL portion of the local submatrix (possibly different for
3310            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3311            structure. The size of this array is equal to the number
3312            of local rows, i.e 'm'.
3313 
3314    If the *_nnz parameter is given then the *_nz parameter is ignored
3315 
3316    The AIJ format (also called the Yale sparse matrix format or
3317    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3318    storage.  The stored row and column indices begin with zero.
3319    See Users-Manual: ch_mat for details.
3320 
3321    The parallel matrix is partitioned such that the first m0 rows belong to
3322    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3323    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3324 
3325    The DIAGONAL portion of the local submatrix of a processor can be defined
3326    as the submatrix which is obtained by extraction the part corresponding to
3327    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3328    first row that belongs to the processor, r2 is the last row belonging to
3329    the this processor, and c1-c2 is range of indices of the local part of a
3330    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3331    common case of a square matrix, the row and column ranges are the same and
3332    the DIAGONAL part is also square. The remaining portion of the local
3333    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3334 
3335    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3336 
3337    You can call MatGetInfo() to get information on how effective the preallocation was;
3338    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3339    You can also run with the option -info and look for messages with the string
3340    malloc in them to see if additional memory allocation was needed.
3341 
3342    Example usage:
3343 
3344    Consider the following 8x8 matrix with 34 non-zero values, that is
3345    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3346    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3347    as follows:
3348 
3349 .vb
3350             1  2  0  |  0  3  0  |  0  4
3351     Proc0   0  5  6  |  7  0  0  |  8  0
3352             9  0 10  | 11  0  0  | 12  0
3353     -------------------------------------
3354            13  0 14  | 15 16 17  |  0  0
3355     Proc1   0 18  0  | 19 20 21  |  0  0
3356             0  0  0  | 22 23  0  | 24  0
3357     -------------------------------------
3358     Proc2  25 26 27  |  0  0 28  | 29  0
3359            30  0  0  | 31 32 33  |  0 34
3360 .ve
3361 
3362    This can be represented as a collection of submatrices as:
3363 
3364 .vb
3365       A B C
3366       D E F
3367       G H I
3368 .ve
3369 
3370    Where the submatrices A,B,C are owned by proc0, D,E,F are
3371    owned by proc1, G,H,I are owned by proc2.
3372 
3373    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3374    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3375    The 'M','N' parameters are 8,8, and have the same values on all procs.
3376 
3377    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3378    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3379    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3380    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3381    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3382    matrix, ans [DF] as another SeqAIJ matrix.
3383 
3384    When d_nz, o_nz parameters are specified, d_nz storage elements are
3385    allocated for every row of the local diagonal submatrix, and o_nz
3386    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3387    One way to choose d_nz and o_nz is to use the max nonzerors per local
3388    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3389    In this case, the values of d_nz,o_nz are:
3390 .vb
3391      proc0 : dnz = 2, o_nz = 2
3392      proc1 : dnz = 3, o_nz = 2
3393      proc2 : dnz = 1, o_nz = 4
3394 .ve
3395    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3396    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3397    for proc3. i.e we are using 12+15+10=37 storage locations to store
3398    34 values.
3399 
3400    When d_nnz, o_nnz parameters are specified, the storage is specified
3401    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3402    In the above case the values for d_nnz,o_nnz are:
3403 .vb
3404      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3405      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3406      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3407 .ve
3408    Here the space allocated is sum of all the above values i.e 34, and
3409    hence pre-allocation is perfect.
3410 
3411    Level: intermediate
3412 
3413 .keywords: matrix, aij, compressed row, sparse, parallel
3414 
3415 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3416           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3417 @*/
3418 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3419 {
3420   PetscErrorCode ierr;
3421 
3422   PetscFunctionBegin;
3423   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3424   PetscValidType(B,1);
3425   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3426   PetscFunctionReturn(0);
3427 }
3428 
3429 #undef __FUNCT__
3430 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3431 /*@
3432      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3433          CSR format the local rows.
3434 
3435    Collective on MPI_Comm
3436 
3437    Input Parameters:
3438 +  comm - MPI communicator
3439 .  m - number of local rows (Cannot be PETSC_DECIDE)
3440 .  n - This value should be the same as the local size used in creating the
3441        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3442        calculated if N is given) For square matrices n is almost always m.
3443 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3444 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3445 .   i - row indices
3446 .   j - column indices
3447 -   a - matrix values
3448 
3449    Output Parameter:
3450 .   mat - the matrix
3451 
3452    Level: intermediate
3453 
3454    Notes:
3455        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3456      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3457      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3458 
3459        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3460 
3461        The format which is used for the sparse matrix input, is equivalent to a
3462     row-major ordering.. i.e for the following matrix, the input data expected is
3463     as shown
3464 
3465 $        1 0 0
3466 $        2 0 3     P0
3467 $       -------
3468 $        4 5 6     P1
3469 $
3470 $     Process0 [P0]: rows_owned=[0,1]
3471 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3472 $        j =  {0,0,2}  [size = 3]
3473 $        v =  {1,2,3}  [size = 3]
3474 $
3475 $     Process1 [P1]: rows_owned=[2]
3476 $        i =  {0,3}    [size = nrow+1  = 1+1]
3477 $        j =  {0,1,2}  [size = 3]
3478 $        v =  {4,5,6}  [size = 3]
3479 
3480 .keywords: matrix, aij, compressed row, sparse, parallel
3481 
3482 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3483           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3484 @*/
3485 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3486 {
3487   PetscErrorCode ierr;
3488 
3489   PetscFunctionBegin;
3490   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3491   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3492   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3493   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3494   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3495   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3496   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3497   PetscFunctionReturn(0);
3498 }
3499 
3500 #undef __FUNCT__
3501 #define __FUNCT__ "MatCreateAIJ"
3502 /*@C
3503    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3504    (the default parallel PETSc format).  For good matrix assembly performance
3505    the user should preallocate the matrix storage by setting the parameters
3506    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3507    performance can be increased by more than a factor of 50.
3508 
3509    Collective on MPI_Comm
3510 
3511    Input Parameters:
3512 +  comm - MPI communicator
3513 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3514            This value should be the same as the local size used in creating the
3515            y vector for the matrix-vector product y = Ax.
3516 .  n - This value should be the same as the local size used in creating the
3517        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3518        calculated if N is given) For square matrices n is almost always m.
3519 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3520 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3521 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3522            (same value is used for all local rows)
3523 .  d_nnz - array containing the number of nonzeros in the various rows of the
3524            DIAGONAL portion of the local submatrix (possibly different for each row)
3525            or NULL, if d_nz is used to specify the nonzero structure.
3526            The size of this array is equal to the number of local rows, i.e 'm'.
3527 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3528            submatrix (same value is used for all local rows).
3529 -  o_nnz - array containing the number of nonzeros in the various rows of the
3530            OFF-DIAGONAL portion of the local submatrix (possibly different for
3531            each row) or NULL, if o_nz is used to specify the nonzero
3532            structure. The size of this array is equal to the number
3533            of local rows, i.e 'm'.
3534 
3535    Output Parameter:
3536 .  A - the matrix
3537 
3538    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3539    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3540    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3541 
3542    Notes:
3543    If the *_nnz parameter is given then the *_nz parameter is ignored
3544 
3545    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3546    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3547    storage requirements for this matrix.
3548 
3549    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3550    processor than it must be used on all processors that share the object for
3551    that argument.
3552 
3553    The user MUST specify either the local or global matrix dimensions
3554    (possibly both).
3555 
3556    The parallel matrix is partitioned across processors such that the
3557    first m0 rows belong to process 0, the next m1 rows belong to
3558    process 1, the next m2 rows belong to process 2 etc.. where
3559    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3560    values corresponding to [m x N] submatrix.
3561 
3562    The columns are logically partitioned with the n0 columns belonging
3563    to 0th partition, the next n1 columns belonging to the next
3564    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3565 
3566    The DIAGONAL portion of the local submatrix on any given processor
3567    is the submatrix corresponding to the rows and columns m,n
3568    corresponding to the given processor. i.e diagonal matrix on
3569    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3570    etc. The remaining portion of the local submatrix [m x (N-n)]
3571    constitute the OFF-DIAGONAL portion. The example below better
3572    illustrates this concept.
3573 
3574    For a square global matrix we define each processor's diagonal portion
3575    to be its local rows and the corresponding columns (a square submatrix);
3576    each processor's off-diagonal portion encompasses the remainder of the
3577    local matrix (a rectangular submatrix).
3578 
3579    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3580 
3581    When calling this routine with a single process communicator, a matrix of
3582    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3583    type of communicator, use the construction mechanism:
3584      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3585 
3586    By default, this format uses inodes (identical nodes) when possible.
3587    We search for consecutive rows with the same nonzero structure, thereby
3588    reusing matrix information to achieve increased efficiency.
3589 
3590    Options Database Keys:
3591 +  -mat_no_inode  - Do not use inodes
3592 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3593 -  -mat_aij_oneindex - Internally use indexing starting at 1
3594         rather than 0.  Note that when calling MatSetValues(),
3595         the user still MUST index entries starting at 0!
3596 
3597 
3598    Example usage:
3599 
3600    Consider the following 8x8 matrix with 34 non-zero values, that is
3601    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3602    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3603    as follows:
3604 
3605 .vb
3606             1  2  0  |  0  3  0  |  0  4
3607     Proc0   0  5  6  |  7  0  0  |  8  0
3608             9  0 10  | 11  0  0  | 12  0
3609     -------------------------------------
3610            13  0 14  | 15 16 17  |  0  0
3611     Proc1   0 18  0  | 19 20 21  |  0  0
3612             0  0  0  | 22 23  0  | 24  0
3613     -------------------------------------
3614     Proc2  25 26 27  |  0  0 28  | 29  0
3615            30  0  0  | 31 32 33  |  0 34
3616 .ve
3617 
3618    This can be represented as a collection of submatrices as:
3619 
3620 .vb
3621       A B C
3622       D E F
3623       G H I
3624 .ve
3625 
3626    Where the submatrices A,B,C are owned by proc0, D,E,F are
3627    owned by proc1, G,H,I are owned by proc2.
3628 
3629    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3630    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3631    The 'M','N' parameters are 8,8, and have the same values on all procs.
3632 
3633    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3634    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3635    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3636    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3637    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3638    matrix, ans [DF] as another SeqAIJ matrix.
3639 
3640    When d_nz, o_nz parameters are specified, d_nz storage elements are
3641    allocated for every row of the local diagonal submatrix, and o_nz
3642    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3643    One way to choose d_nz and o_nz is to use the max nonzerors per local
3644    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3645    In this case, the values of d_nz,o_nz are:
3646 .vb
3647      proc0 : dnz = 2, o_nz = 2
3648      proc1 : dnz = 3, o_nz = 2
3649      proc2 : dnz = 1, o_nz = 4
3650 .ve
3651    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3652    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3653    for proc3. i.e we are using 12+15+10=37 storage locations to store
3654    34 values.
3655 
3656    When d_nnz, o_nnz parameters are specified, the storage is specified
3657    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3658    In the above case the values for d_nnz,o_nnz are:
3659 .vb
3660      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3661      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3662      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3663 .ve
3664    Here the space allocated is sum of all the above values i.e 34, and
3665    hence pre-allocation is perfect.
3666 
3667    Level: intermediate
3668 
3669 .keywords: matrix, aij, compressed row, sparse, parallel
3670 
3671 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3672           MPIAIJ, MatCreateMPIAIJWithArrays()
3673 @*/
3674 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3675 {
3676   PetscErrorCode ierr;
3677   PetscMPIInt    size;
3678 
3679   PetscFunctionBegin;
3680   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3681   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3682   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3683   if (size > 1) {
3684     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3685     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3686   } else {
3687     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3688     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3689   }
3690   PetscFunctionReturn(0);
3691 }
3692 
3693 #undef __FUNCT__
3694 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3695 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3696 {
3697   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3698 
3699   PetscFunctionBegin;
3700   if (Ad)     *Ad     = a->A;
3701   if (Ao)     *Ao     = a->B;
3702   if (colmap) *colmap = a->garray;
3703   PetscFunctionReturn(0);
3704 }
3705 
3706 #undef __FUNCT__
3707 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3708 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3709 {
3710   PetscErrorCode ierr;
3711   PetscInt       i;
3712   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3713 
3714   PetscFunctionBegin;
3715   if (coloring->ctype == IS_COLORING_GLOBAL) {
3716     ISColoringValue *allcolors,*colors;
3717     ISColoring      ocoloring;
3718 
3719     /* set coloring for diagonal portion */
3720     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3721 
3722     /* set coloring for off-diagonal portion */
3723     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3724     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3725     for (i=0; i<a->B->cmap->n; i++) {
3726       colors[i] = allcolors[a->garray[i]];
3727     }
3728     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3729     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3730     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3731     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3732   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3733     ISColoringValue *colors;
3734     PetscInt        *larray;
3735     ISColoring      ocoloring;
3736 
3737     /* set coloring for diagonal portion */
3738     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3739     for (i=0; i<a->A->cmap->n; i++) {
3740       larray[i] = i + A->cmap->rstart;
3741     }
3742     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3743     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3744     for (i=0; i<a->A->cmap->n; i++) {
3745       colors[i] = coloring->colors[larray[i]];
3746     }
3747     ierr = PetscFree(larray);CHKERRQ(ierr);
3748     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3749     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3750     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3751 
3752     /* set coloring for off-diagonal portion */
3753     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3754     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3755     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3756     for (i=0; i<a->B->cmap->n; i++) {
3757       colors[i] = coloring->colors[larray[i]];
3758     }
3759     ierr = PetscFree(larray);CHKERRQ(ierr);
3760     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3761     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3762     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3763   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3764   PetscFunctionReturn(0);
3765 }
3766 
3767 #undef __FUNCT__
3768 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3769 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3770 {
3771   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3772   PetscErrorCode ierr;
3773 
3774   PetscFunctionBegin;
3775   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3776   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3777   PetscFunctionReturn(0);
3778 }
3779 
3780 #undef __FUNCT__
3781 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3782 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3783 {
3784   PetscErrorCode ierr;
3785   PetscInt       m,N,i,rstart,nnz,Ii;
3786   PetscInt       *indx;
3787   PetscScalar    *values;
3788 
3789   PetscFunctionBegin;
3790   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3791   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3792     PetscInt       *dnz,*onz,sum,bs,cbs;
3793 
3794     if (n == PETSC_DECIDE) {
3795       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3796     }
3797     /* Check sum(n) = N */
3798     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3799     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3800 
3801     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3802     rstart -= m;
3803 
3804     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3805     for (i=0; i<m; i++) {
3806       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3807       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3808       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3809     }
3810 
3811     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3812     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3813     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3814     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3815     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3816     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3817     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3818   }
3819 
3820   /* numeric phase */
3821   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3822   for (i=0; i<m; i++) {
3823     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3824     Ii   = i + rstart;
3825     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3826     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3827   }
3828   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3829   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830   PetscFunctionReturn(0);
3831 }
3832 
3833 #undef __FUNCT__
3834 #define __FUNCT__ "MatFileSplit"
3835 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3836 {
3837   PetscErrorCode    ierr;
3838   PetscMPIInt       rank;
3839   PetscInt          m,N,i,rstart,nnz;
3840   size_t            len;
3841   const PetscInt    *indx;
3842   PetscViewer       out;
3843   char              *name;
3844   Mat               B;
3845   const PetscScalar *values;
3846 
3847   PetscFunctionBegin;
3848   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3849   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3850   /* Should this be the type of the diagonal block of A? */
3851   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3852   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3853   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3854   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3855   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3856   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3857   for (i=0; i<m; i++) {
3858     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3859     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3860     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3861   }
3862   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3863   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3864 
3865   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3866   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3867   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3868   sprintf(name,"%s.%d",outfile,rank);
3869   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3870   ierr = PetscFree(name);CHKERRQ(ierr);
3871   ierr = MatView(B,out);CHKERRQ(ierr);
3872   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3873   ierr = MatDestroy(&B);CHKERRQ(ierr);
3874   PetscFunctionReturn(0);
3875 }
3876 
3877 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3878 #undef __FUNCT__
3879 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3880 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3881 {
3882   PetscErrorCode      ierr;
3883   Mat_Merge_SeqsToMPI *merge;
3884   PetscContainer      container;
3885 
3886   PetscFunctionBegin;
3887   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3888   if (container) {
3889     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3890     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3891     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3892     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3893     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3894     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3895     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3896     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3897     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3898     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3899     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3900     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3901     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3902     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3903     ierr = PetscFree(merge);CHKERRQ(ierr);
3904     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3905   }
3906   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3907   PetscFunctionReturn(0);
3908 }
3909 
3910 #include <../src/mat/utils/freespace.h>
3911 #include <petscbt.h>
3912 
3913 #undef __FUNCT__
3914 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3915 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3916 {
3917   PetscErrorCode      ierr;
3918   MPI_Comm            comm;
3919   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3920   PetscMPIInt         size,rank,taga,*len_s;
3921   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3922   PetscInt            proc,m;
3923   PetscInt            **buf_ri,**buf_rj;
3924   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3925   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3926   MPI_Request         *s_waits,*r_waits;
3927   MPI_Status          *status;
3928   MatScalar           *aa=a->a;
3929   MatScalar           **abuf_r,*ba_i;
3930   Mat_Merge_SeqsToMPI *merge;
3931   PetscContainer      container;
3932 
3933   PetscFunctionBegin;
3934   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3935   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3936 
3937   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3938   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3939 
3940   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3941   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3942 
3943   bi     = merge->bi;
3944   bj     = merge->bj;
3945   buf_ri = merge->buf_ri;
3946   buf_rj = merge->buf_rj;
3947 
3948   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3949   owners = merge->rowmap->range;
3950   len_s  = merge->len_s;
3951 
3952   /* send and recv matrix values */
3953   /*-----------------------------*/
3954   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3955   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3956 
3957   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3958   for (proc=0,k=0; proc<size; proc++) {
3959     if (!len_s[proc]) continue;
3960     i    = owners[proc];
3961     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3962     k++;
3963   }
3964 
3965   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3966   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3967   ierr = PetscFree(status);CHKERRQ(ierr);
3968 
3969   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3970   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3971 
3972   /* insert mat values of mpimat */
3973   /*----------------------------*/
3974   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3975   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3976 
3977   for (k=0; k<merge->nrecv; k++) {
3978     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3979     nrows       = *(buf_ri_k[k]);
3980     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3981     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
3982   }
3983 
3984   /* set values of ba */
3985   m = merge->rowmap->n;
3986   for (i=0; i<m; i++) {
3987     arow = owners[rank] + i;
3988     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3989     bnzi = bi[i+1] - bi[i];
3990     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3991 
3992     /* add local non-zero vals of this proc's seqmat into ba */
3993     anzi   = ai[arow+1] - ai[arow];
3994     aj     = a->j + ai[arow];
3995     aa     = a->a + ai[arow];
3996     nextaj = 0;
3997     for (j=0; nextaj<anzi; j++) {
3998       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
3999         ba_i[j] += aa[nextaj++];
4000       }
4001     }
4002 
4003     /* add received vals into ba */
4004     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4005       /* i-th row */
4006       if (i == *nextrow[k]) {
4007         anzi   = *(nextai[k]+1) - *nextai[k];
4008         aj     = buf_rj[k] + *(nextai[k]);
4009         aa     = abuf_r[k] + *(nextai[k]);
4010         nextaj = 0;
4011         for (j=0; nextaj<anzi; j++) {
4012           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4013             ba_i[j] += aa[nextaj++];
4014           }
4015         }
4016         nextrow[k]++; nextai[k]++;
4017       }
4018     }
4019     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4020   }
4021   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4022   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4023 
4024   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4025   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4026   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4027   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4028   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4029   PetscFunctionReturn(0);
4030 }
4031 
4032 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4033 
4034 #undef __FUNCT__
4035 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4036 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4037 {
4038   PetscErrorCode      ierr;
4039   Mat                 B_mpi;
4040   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4041   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4042   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4043   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4044   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4045   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4046   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4047   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4048   MPI_Status          *status;
4049   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4050   PetscBT             lnkbt;
4051   Mat_Merge_SeqsToMPI *merge;
4052   PetscContainer      container;
4053 
4054   PetscFunctionBegin;
4055   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4056 
4057   /* make sure it is a PETSc comm */
4058   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4059   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4060   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4061 
4062   ierr = PetscNew(&merge);CHKERRQ(ierr);
4063   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4064 
4065   /* determine row ownership */
4066   /*---------------------------------------------------------*/
4067   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4068   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4069   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4070   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4071   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4072   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4073   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4074 
4075   m      = merge->rowmap->n;
4076   owners = merge->rowmap->range;
4077 
4078   /* determine the number of messages to send, their lengths */
4079   /*---------------------------------------------------------*/
4080   len_s = merge->len_s;
4081 
4082   len          = 0; /* length of buf_si[] */
4083   merge->nsend = 0;
4084   for (proc=0; proc<size; proc++) {
4085     len_si[proc] = 0;
4086     if (proc == rank) {
4087       len_s[proc] = 0;
4088     } else {
4089       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4090       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4091     }
4092     if (len_s[proc]) {
4093       merge->nsend++;
4094       nrows = 0;
4095       for (i=owners[proc]; i<owners[proc+1]; i++) {
4096         if (ai[i+1] > ai[i]) nrows++;
4097       }
4098       len_si[proc] = 2*(nrows+1);
4099       len         += len_si[proc];
4100     }
4101   }
4102 
4103   /* determine the number and length of messages to receive for ij-structure */
4104   /*-------------------------------------------------------------------------*/
4105   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4106   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4107 
4108   /* post the Irecv of j-structure */
4109   /*-------------------------------*/
4110   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4111   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4112 
4113   /* post the Isend of j-structure */
4114   /*--------------------------------*/
4115   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4116 
4117   for (proc=0, k=0; proc<size; proc++) {
4118     if (!len_s[proc]) continue;
4119     i    = owners[proc];
4120     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4121     k++;
4122   }
4123 
4124   /* receives and sends of j-structure are complete */
4125   /*------------------------------------------------*/
4126   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4127   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4128 
4129   /* send and recv i-structure */
4130   /*---------------------------*/
4131   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4132   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4133 
4134   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4135   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4136   for (proc=0,k=0; proc<size; proc++) {
4137     if (!len_s[proc]) continue;
4138     /* form outgoing message for i-structure:
4139          buf_si[0]:                 nrows to be sent
4140                [1:nrows]:           row index (global)
4141                [nrows+1:2*nrows+1]: i-structure index
4142     */
4143     /*-------------------------------------------*/
4144     nrows       = len_si[proc]/2 - 1;
4145     buf_si_i    = buf_si + nrows+1;
4146     buf_si[0]   = nrows;
4147     buf_si_i[0] = 0;
4148     nrows       = 0;
4149     for (i=owners[proc]; i<owners[proc+1]; i++) {
4150       anzi = ai[i+1] - ai[i];
4151       if (anzi) {
4152         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4153         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4154         nrows++;
4155       }
4156     }
4157     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4158     k++;
4159     buf_si += len_si[proc];
4160   }
4161 
4162   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4163   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4164 
4165   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4166   for (i=0; i<merge->nrecv; i++) {
4167     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4168   }
4169 
4170   ierr = PetscFree(len_si);CHKERRQ(ierr);
4171   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4172   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4173   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4174   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4175   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4176   ierr = PetscFree(status);CHKERRQ(ierr);
4177 
4178   /* compute a local seq matrix in each processor */
4179   /*----------------------------------------------*/
4180   /* allocate bi array and free space for accumulating nonzero column info */
4181   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4182   bi[0] = 0;
4183 
4184   /* create and initialize a linked list */
4185   nlnk = N+1;
4186   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4187 
4188   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4189   len  = ai[owners[rank+1]] - ai[owners[rank]];
4190   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4191 
4192   current_space = free_space;
4193 
4194   /* determine symbolic info for each local row */
4195   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4196 
4197   for (k=0; k<merge->nrecv; k++) {
4198     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4199     nrows       = *buf_ri_k[k];
4200     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4201     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4202   }
4203 
4204   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4205   len  = 0;
4206   for (i=0; i<m; i++) {
4207     bnzi = 0;
4208     /* add local non-zero cols of this proc's seqmat into lnk */
4209     arow  = owners[rank] + i;
4210     anzi  = ai[arow+1] - ai[arow];
4211     aj    = a->j + ai[arow];
4212     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4213     bnzi += nlnk;
4214     /* add received col data into lnk */
4215     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4216       if (i == *nextrow[k]) { /* i-th row */
4217         anzi  = *(nextai[k]+1) - *nextai[k];
4218         aj    = buf_rj[k] + *nextai[k];
4219         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4220         bnzi += nlnk;
4221         nextrow[k]++; nextai[k]++;
4222       }
4223     }
4224     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4225 
4226     /* if free space is not available, make more free space */
4227     if (current_space->local_remaining<bnzi) {
4228       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4229       nspacedouble++;
4230     }
4231     /* copy data into free space, then initialize lnk */
4232     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4233     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4234 
4235     current_space->array           += bnzi;
4236     current_space->local_used      += bnzi;
4237     current_space->local_remaining -= bnzi;
4238 
4239     bi[i+1] = bi[i] + bnzi;
4240   }
4241 
4242   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4243 
4244   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4245   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4246   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4247 
4248   /* create symbolic parallel matrix B_mpi */
4249   /*---------------------------------------*/
4250   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4251   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4252   if (n==PETSC_DECIDE) {
4253     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4254   } else {
4255     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4256   }
4257   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4258   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4259   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4260   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4261   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4262 
4263   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4264   B_mpi->assembled    = PETSC_FALSE;
4265   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4266   merge->bi           = bi;
4267   merge->bj           = bj;
4268   merge->buf_ri       = buf_ri;
4269   merge->buf_rj       = buf_rj;
4270   merge->coi          = NULL;
4271   merge->coj          = NULL;
4272   merge->owners_co    = NULL;
4273 
4274   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4275 
4276   /* attach the supporting struct to B_mpi for reuse */
4277   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4278   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4279   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4280   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4281   *mpimat = B_mpi;
4282 
4283   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4284   PetscFunctionReturn(0);
4285 }
4286 
4287 #undef __FUNCT__
4288 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4289 /*@C
4290       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4291                  matrices from each processor
4292 
4293     Collective on MPI_Comm
4294 
4295    Input Parameters:
4296 +    comm - the communicators the parallel matrix will live on
4297 .    seqmat - the input sequential matrices
4298 .    m - number of local rows (or PETSC_DECIDE)
4299 .    n - number of local columns (or PETSC_DECIDE)
4300 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4301 
4302    Output Parameter:
4303 .    mpimat - the parallel matrix generated
4304 
4305     Level: advanced
4306 
4307    Notes:
4308      The dimensions of the sequential matrix in each processor MUST be the same.
4309      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4310      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4311 @*/
4312 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4313 {
4314   PetscErrorCode ierr;
4315   PetscMPIInt    size;
4316 
4317   PetscFunctionBegin;
4318   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4319   if (size == 1) {
4320     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4321     if (scall == MAT_INITIAL_MATRIX) {
4322       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4323     } else {
4324       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4325     }
4326     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4327     PetscFunctionReturn(0);
4328   }
4329   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4330   if (scall == MAT_INITIAL_MATRIX) {
4331     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4332   }
4333   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4334   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4335   PetscFunctionReturn(0);
4336 }
4337 
4338 #undef __FUNCT__
4339 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4340 /*@
4341      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4342           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4343           with MatGetSize()
4344 
4345     Not Collective
4346 
4347    Input Parameters:
4348 +    A - the matrix
4349 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4350 
4351    Output Parameter:
4352 .    A_loc - the local sequential matrix generated
4353 
4354     Level: developer
4355 
4356 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4357 
4358 @*/
4359 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4360 {
4361   PetscErrorCode ierr;
4362   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4363   Mat_SeqAIJ     *mat,*a,*b;
4364   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4365   MatScalar      *aa,*ba,*cam;
4366   PetscScalar    *ca;
4367   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4368   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4369   PetscBool      match;
4370   MPI_Comm       comm;
4371   PetscMPIInt    size;
4372 
4373   PetscFunctionBegin;
4374   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4375   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4376   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4377   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4378   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4379 
4380   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4381   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4382   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4383   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4384   aa = a->a; ba = b->a;
4385   if (scall == MAT_INITIAL_MATRIX) {
4386     if (size == 1) {
4387       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4388       PetscFunctionReturn(0);
4389     }
4390 
4391     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4392     ci[0] = 0;
4393     for (i=0; i<am; i++) {
4394       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4395     }
4396     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4397     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4398     k    = 0;
4399     for (i=0; i<am; i++) {
4400       ncols_o = bi[i+1] - bi[i];
4401       ncols_d = ai[i+1] - ai[i];
4402       /* off-diagonal portion of A */
4403       for (jo=0; jo<ncols_o; jo++) {
4404         col = cmap[*bj];
4405         if (col >= cstart) break;
4406         cj[k]   = col; bj++;
4407         ca[k++] = *ba++;
4408       }
4409       /* diagonal portion of A */
4410       for (j=0; j<ncols_d; j++) {
4411         cj[k]   = cstart + *aj++;
4412         ca[k++] = *aa++;
4413       }
4414       /* off-diagonal portion of A */
4415       for (j=jo; j<ncols_o; j++) {
4416         cj[k]   = cmap[*bj++];
4417         ca[k++] = *ba++;
4418       }
4419     }
4420     /* put together the new matrix */
4421     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4422     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4423     /* Since these are PETSc arrays, change flags to free them as necessary. */
4424     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4425     mat->free_a  = PETSC_TRUE;
4426     mat->free_ij = PETSC_TRUE;
4427     mat->nonew   = 0;
4428   } else if (scall == MAT_REUSE_MATRIX) {
4429     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4430     ci = mat->i; cj = mat->j; cam = mat->a;
4431     for (i=0; i<am; i++) {
4432       /* off-diagonal portion of A */
4433       ncols_o = bi[i+1] - bi[i];
4434       for (jo=0; jo<ncols_o; jo++) {
4435         col = cmap[*bj];
4436         if (col >= cstart) break;
4437         *cam++ = *ba++; bj++;
4438       }
4439       /* diagonal portion of A */
4440       ncols_d = ai[i+1] - ai[i];
4441       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4442       /* off-diagonal portion of A */
4443       for (j=jo; j<ncols_o; j++) {
4444         *cam++ = *ba++; bj++;
4445       }
4446     }
4447   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4448   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4449   PetscFunctionReturn(0);
4450 }
4451 
4452 #undef __FUNCT__
4453 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4454 /*@C
4455      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4456 
4457     Not Collective
4458 
4459    Input Parameters:
4460 +    A - the matrix
4461 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4462 -    row, col - index sets of rows and columns to extract (or NULL)
4463 
4464    Output Parameter:
4465 .    A_loc - the local sequential matrix generated
4466 
4467     Level: developer
4468 
4469 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4470 
4471 @*/
4472 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4473 {
4474   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4475   PetscErrorCode ierr;
4476   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4477   IS             isrowa,iscola;
4478   Mat            *aloc;
4479   PetscBool      match;
4480 
4481   PetscFunctionBegin;
4482   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4483   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4484   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4485   if (!row) {
4486     start = A->rmap->rstart; end = A->rmap->rend;
4487     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4488   } else {
4489     isrowa = *row;
4490   }
4491   if (!col) {
4492     start = A->cmap->rstart;
4493     cmap  = a->garray;
4494     nzA   = a->A->cmap->n;
4495     nzB   = a->B->cmap->n;
4496     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4497     ncols = 0;
4498     for (i=0; i<nzB; i++) {
4499       if (cmap[i] < start) idx[ncols++] = cmap[i];
4500       else break;
4501     }
4502     imark = i;
4503     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4504     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4505     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4506   } else {
4507     iscola = *col;
4508   }
4509   if (scall != MAT_INITIAL_MATRIX) {
4510     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4511     aloc[0] = *A_loc;
4512   }
4513   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4514   *A_loc = aloc[0];
4515   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4516   if (!row) {
4517     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4518   }
4519   if (!col) {
4520     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4521   }
4522   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4523   PetscFunctionReturn(0);
4524 }
4525 
4526 #undef __FUNCT__
4527 #define __FUNCT__ "MatGetBrowsOfAcols"
4528 /*@C
4529     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4530 
4531     Collective on Mat
4532 
4533    Input Parameters:
4534 +    A,B - the matrices in mpiaij format
4535 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4536 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4537 
4538    Output Parameter:
4539 +    rowb, colb - index sets of rows and columns of B to extract
4540 -    B_seq - the sequential matrix generated
4541 
4542     Level: developer
4543 
4544 @*/
4545 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4546 {
4547   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4548   PetscErrorCode ierr;
4549   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4550   IS             isrowb,iscolb;
4551   Mat            *bseq=NULL;
4552 
4553   PetscFunctionBegin;
4554   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4555     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4556   }
4557   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4558 
4559   if (scall == MAT_INITIAL_MATRIX) {
4560     start = A->cmap->rstart;
4561     cmap  = a->garray;
4562     nzA   = a->A->cmap->n;
4563     nzB   = a->B->cmap->n;
4564     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4565     ncols = 0;
4566     for (i=0; i<nzB; i++) {  /* row < local row index */
4567       if (cmap[i] < start) idx[ncols++] = cmap[i];
4568       else break;
4569     }
4570     imark = i;
4571     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4572     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4573     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4574     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4575   } else {
4576     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4577     isrowb  = *rowb; iscolb = *colb;
4578     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4579     bseq[0] = *B_seq;
4580   }
4581   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4582   *B_seq = bseq[0];
4583   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4584   if (!rowb) {
4585     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4586   } else {
4587     *rowb = isrowb;
4588   }
4589   if (!colb) {
4590     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4591   } else {
4592     *colb = iscolb;
4593   }
4594   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4595   PetscFunctionReturn(0);
4596 }
4597 
4598 #undef __FUNCT__
4599 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4600 /*
4601     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4602     of the OFF-DIAGONAL portion of local A
4603 
4604     Collective on Mat
4605 
4606    Input Parameters:
4607 +    A,B - the matrices in mpiaij format
4608 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4609 
4610    Output Parameter:
4611 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4612 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4613 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4614 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4615 
4616     Level: developer
4617 
4618 */
4619 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4620 {
4621   VecScatter_MPI_General *gen_to,*gen_from;
4622   PetscErrorCode         ierr;
4623   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4624   Mat_SeqAIJ             *b_oth;
4625   VecScatter             ctx =a->Mvctx;
4626   MPI_Comm               comm;
4627   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4628   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4629   PetscScalar            *rvalues,*svalues;
4630   MatScalar              *b_otha,*bufa,*bufA;
4631   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4632   MPI_Request            *rwaits = NULL,*swaits = NULL;
4633   MPI_Status             *sstatus,rstatus;
4634   PetscMPIInt            jj,size;
4635   PetscInt               *cols,sbs,rbs;
4636   PetscScalar            *vals;
4637 
4638   PetscFunctionBegin;
4639   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4640   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4641 
4642   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4643     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4644   }
4645   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4646   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4647 
4648   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4649   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4650   rvalues  = gen_from->values; /* holds the length of receiving row */
4651   svalues  = gen_to->values;   /* holds the length of sending row */
4652   nrecvs   = gen_from->n;
4653   nsends   = gen_to->n;
4654 
4655   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4656   srow    = gen_to->indices;    /* local row index to be sent */
4657   sstarts = gen_to->starts;
4658   sprocs  = gen_to->procs;
4659   sstatus = gen_to->sstatus;
4660   sbs     = gen_to->bs;
4661   rstarts = gen_from->starts;
4662   rprocs  = gen_from->procs;
4663   rbs     = gen_from->bs;
4664 
4665   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4666   if (scall == MAT_INITIAL_MATRIX) {
4667     /* i-array */
4668     /*---------*/
4669     /*  post receives */
4670     for (i=0; i<nrecvs; i++) {
4671       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4672       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4673       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4674     }
4675 
4676     /* pack the outgoing message */
4677     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4678 
4679     sstartsj[0] = 0;
4680     rstartsj[0] = 0;
4681     len         = 0; /* total length of j or a array to be sent */
4682     k           = 0;
4683     for (i=0; i<nsends; i++) {
4684       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4685       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4686       for (j=0; j<nrows; j++) {
4687         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4688         for (l=0; l<sbs; l++) {
4689           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4690 
4691           rowlen[j*sbs+l] = ncols;
4692 
4693           len += ncols;
4694           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4695         }
4696         k++;
4697       }
4698       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4699 
4700       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4701     }
4702     /* recvs and sends of i-array are completed */
4703     i = nrecvs;
4704     while (i--) {
4705       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4706     }
4707     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4708 
4709     /* allocate buffers for sending j and a arrays */
4710     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4711     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4712 
4713     /* create i-array of B_oth */
4714     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4715 
4716     b_othi[0] = 0;
4717     len       = 0; /* total length of j or a array to be received */
4718     k         = 0;
4719     for (i=0; i<nrecvs; i++) {
4720       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4721       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4722       for (j=0; j<nrows; j++) {
4723         b_othi[k+1] = b_othi[k] + rowlen[j];
4724         len        += rowlen[j]; k++;
4725       }
4726       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4727     }
4728 
4729     /* allocate space for j and a arrrays of B_oth */
4730     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4731     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4732 
4733     /* j-array */
4734     /*---------*/
4735     /*  post receives of j-array */
4736     for (i=0; i<nrecvs; i++) {
4737       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4738       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4739     }
4740 
4741     /* pack the outgoing message j-array */
4742     k = 0;
4743     for (i=0; i<nsends; i++) {
4744       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4745       bufJ  = bufj+sstartsj[i];
4746       for (j=0; j<nrows; j++) {
4747         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4748         for (ll=0; ll<sbs; ll++) {
4749           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4750           for (l=0; l<ncols; l++) {
4751             *bufJ++ = cols[l];
4752           }
4753           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4754         }
4755       }
4756       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4757     }
4758 
4759     /* recvs and sends of j-array are completed */
4760     i = nrecvs;
4761     while (i--) {
4762       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4763     }
4764     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4765   } else if (scall == MAT_REUSE_MATRIX) {
4766     sstartsj = *startsj_s;
4767     rstartsj = *startsj_r;
4768     bufa     = *bufa_ptr;
4769     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4770     b_otha   = b_oth->a;
4771   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4772 
4773   /* a-array */
4774   /*---------*/
4775   /*  post receives of a-array */
4776   for (i=0; i<nrecvs; i++) {
4777     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4778     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4779   }
4780 
4781   /* pack the outgoing message a-array */
4782   k = 0;
4783   for (i=0; i<nsends; i++) {
4784     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4785     bufA  = bufa+sstartsj[i];
4786     for (j=0; j<nrows; j++) {
4787       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4788       for (ll=0; ll<sbs; ll++) {
4789         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4790         for (l=0; l<ncols; l++) {
4791           *bufA++ = vals[l];
4792         }
4793         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4794       }
4795     }
4796     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4797   }
4798   /* recvs and sends of a-array are completed */
4799   i = nrecvs;
4800   while (i--) {
4801     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4802   }
4803   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4804   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4805 
4806   if (scall == MAT_INITIAL_MATRIX) {
4807     /* put together the new matrix */
4808     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4809 
4810     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4811     /* Since these are PETSc arrays, change flags to free them as necessary. */
4812     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4813     b_oth->free_a  = PETSC_TRUE;
4814     b_oth->free_ij = PETSC_TRUE;
4815     b_oth->nonew   = 0;
4816 
4817     ierr = PetscFree(bufj);CHKERRQ(ierr);
4818     if (!startsj_s || !bufa_ptr) {
4819       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4820       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4821     } else {
4822       *startsj_s = sstartsj;
4823       *startsj_r = rstartsj;
4824       *bufa_ptr  = bufa;
4825     }
4826   }
4827   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4828   PetscFunctionReturn(0);
4829 }
4830 
4831 #undef __FUNCT__
4832 #define __FUNCT__ "MatGetCommunicationStructs"
4833 /*@C
4834   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4835 
4836   Not Collective
4837 
4838   Input Parameters:
4839 . A - The matrix in mpiaij format
4840 
4841   Output Parameter:
4842 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4843 . colmap - A map from global column index to local index into lvec
4844 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4845 
4846   Level: developer
4847 
4848 @*/
4849 #if defined(PETSC_USE_CTABLE)
4850 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4851 #else
4852 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4853 #endif
4854 {
4855   Mat_MPIAIJ *a;
4856 
4857   PetscFunctionBegin;
4858   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4859   PetscValidPointer(lvec, 2);
4860   PetscValidPointer(colmap, 3);
4861   PetscValidPointer(multScatter, 4);
4862   a = (Mat_MPIAIJ*) A->data;
4863   if (lvec) *lvec = a->lvec;
4864   if (colmap) *colmap = a->colmap;
4865   if (multScatter) *multScatter = a->Mvctx;
4866   PetscFunctionReturn(0);
4867 }
4868 
4869 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4870 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4871 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4872 #if defined(PETSC_HAVE_ELEMENTAL)
4873 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4874 #endif
4875 
4876 #undef __FUNCT__
4877 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4878 /*
4879     Computes (B'*A')' since computing B*A directly is untenable
4880 
4881                n                       p                          p
4882         (              )       (              )         (                  )
4883       m (      A       )  *  n (       B      )   =   m (         C        )
4884         (              )       (              )         (                  )
4885 
4886 */
4887 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4888 {
4889   PetscErrorCode ierr;
4890   Mat            At,Bt,Ct;
4891 
4892   PetscFunctionBegin;
4893   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4894   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4895   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4896   ierr = MatDestroy(&At);CHKERRQ(ierr);
4897   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4898   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4899   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4900   PetscFunctionReturn(0);
4901 }
4902 
4903 #undef __FUNCT__
4904 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4905 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4906 {
4907   PetscErrorCode ierr;
4908   PetscInt       m=A->rmap->n,n=B->cmap->n;
4909   Mat            Cmat;
4910 
4911   PetscFunctionBegin;
4912   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4913   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4914   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4915   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4916   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4917   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4918   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4919   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4920 
4921   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4922 
4923   *C = Cmat;
4924   PetscFunctionReturn(0);
4925 }
4926 
4927 /* ----------------------------------------------------------------*/
4928 #undef __FUNCT__
4929 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4930 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4931 {
4932   PetscErrorCode ierr;
4933 
4934   PetscFunctionBegin;
4935   if (scall == MAT_INITIAL_MATRIX) {
4936     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4937     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4938     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4939   }
4940   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4941   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4942   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4943   PetscFunctionReturn(0);
4944 }
4945 
4946 /*MC
4947    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4948 
4949    Options Database Keys:
4950 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4951 
4952   Level: beginner
4953 
4954 .seealso: MatCreateAIJ()
4955 M*/
4956 
4957 #undef __FUNCT__
4958 #define __FUNCT__ "MatCreate_MPIAIJ"
4959 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4960 {
4961   Mat_MPIAIJ     *b;
4962   PetscErrorCode ierr;
4963   PetscMPIInt    size;
4964 
4965   PetscFunctionBegin;
4966   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4967 
4968   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4969   B->data       = (void*)b;
4970   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4971   B->assembled  = PETSC_FALSE;
4972   B->insertmode = NOT_SET_VALUES;
4973   b->size       = size;
4974 
4975   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
4976 
4977   /* build cache for off array entries formed */
4978   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
4979 
4980   b->donotstash  = PETSC_FALSE;
4981   b->colmap      = 0;
4982   b->garray      = 0;
4983   b->roworiented = PETSC_TRUE;
4984 
4985   /* stuff used for matrix vector multiply */
4986   b->lvec  = NULL;
4987   b->Mvctx = NULL;
4988 
4989   /* stuff for MatGetRow() */
4990   b->rowindices   = 0;
4991   b->rowvalues    = 0;
4992   b->getrowactive = PETSC_FALSE;
4993 
4994   /* flexible pointer used in CUSP/CUSPARSE classes */
4995   b->spptr = NULL;
4996 
4997   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4998   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4999   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5000   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5001   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5002   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5003   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5004   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5005   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5006   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5007 #if defined(PETSC_HAVE_ELEMENTAL)
5008   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5009 #endif
5010   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5011   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5012   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5013   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5014   PetscFunctionReturn(0);
5015 }
5016 
5017 #undef __FUNCT__
5018 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5019 /*@C
5020      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5021          and "off-diagonal" part of the matrix in CSR format.
5022 
5023    Collective on MPI_Comm
5024 
5025    Input Parameters:
5026 +  comm - MPI communicator
5027 .  m - number of local rows (Cannot be PETSC_DECIDE)
5028 .  n - This value should be the same as the local size used in creating the
5029        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5030        calculated if N is given) For square matrices n is almost always m.
5031 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5032 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5033 .   i - row indices for "diagonal" portion of matrix
5034 .   j - column indices
5035 .   a - matrix values
5036 .   oi - row indices for "off-diagonal" portion of matrix
5037 .   oj - column indices
5038 -   oa - matrix values
5039 
5040    Output Parameter:
5041 .   mat - the matrix
5042 
5043    Level: advanced
5044 
5045    Notes:
5046        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5047        must free the arrays once the matrix has been destroyed and not before.
5048 
5049        The i and j indices are 0 based
5050 
5051        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5052 
5053        This sets local rows and cannot be used to set off-processor values.
5054 
5055        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5056        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5057        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5058        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5059        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5060        communication if it is known that only local entries will be set.
5061 
5062 .keywords: matrix, aij, compressed row, sparse, parallel
5063 
5064 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5065           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5066 @*/
5067 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5068 {
5069   PetscErrorCode ierr;
5070   Mat_MPIAIJ     *maij;
5071 
5072   PetscFunctionBegin;
5073   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5074   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5075   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5076   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5077   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5078   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5079   maij = (Mat_MPIAIJ*) (*mat)->data;
5080 
5081   (*mat)->preallocated = PETSC_TRUE;
5082 
5083   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5084   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5085 
5086   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5087   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5088 
5089   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5090   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5091   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5092   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5093 
5094   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5095   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5096   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5097   PetscFunctionReturn(0);
5098 }
5099 
5100 /*
5101     Special version for direct calls from Fortran
5102 */
5103 #include <petsc/private/fortranimpl.h>
5104 
5105 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5106 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5107 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5108 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5109 #endif
5110 
5111 /* Change these macros so can be used in void function */
5112 #undef CHKERRQ
5113 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5114 #undef SETERRQ2
5115 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5116 #undef SETERRQ3
5117 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5118 #undef SETERRQ
5119 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5120 
5121 #undef __FUNCT__
5122 #define __FUNCT__ "matsetvaluesmpiaij_"
5123 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5124 {
5125   Mat            mat  = *mmat;
5126   PetscInt       m    = *mm, n = *mn;
5127   InsertMode     addv = *maddv;
5128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5129   PetscScalar    value;
5130   PetscErrorCode ierr;
5131 
5132   MatCheckPreallocated(mat,1);
5133   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5134 
5135 #if defined(PETSC_USE_DEBUG)
5136   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5137 #endif
5138   {
5139     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5140     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5141     PetscBool roworiented = aij->roworiented;
5142 
5143     /* Some Variables required in the macro */
5144     Mat        A                 = aij->A;
5145     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5146     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5147     MatScalar  *aa               = a->a;
5148     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5149     Mat        B                 = aij->B;
5150     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5151     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5152     MatScalar  *ba               = b->a;
5153 
5154     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5155     PetscInt  nonew = a->nonew;
5156     MatScalar *ap1,*ap2;
5157 
5158     PetscFunctionBegin;
5159     for (i=0; i<m; i++) {
5160       if (im[i] < 0) continue;
5161 #if defined(PETSC_USE_DEBUG)
5162       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5163 #endif
5164       if (im[i] >= rstart && im[i] < rend) {
5165         row      = im[i] - rstart;
5166         lastcol1 = -1;
5167         rp1      = aj + ai[row];
5168         ap1      = aa + ai[row];
5169         rmax1    = aimax[row];
5170         nrow1    = ailen[row];
5171         low1     = 0;
5172         high1    = nrow1;
5173         lastcol2 = -1;
5174         rp2      = bj + bi[row];
5175         ap2      = ba + bi[row];
5176         rmax2    = bimax[row];
5177         nrow2    = bilen[row];
5178         low2     = 0;
5179         high2    = nrow2;
5180 
5181         for (j=0; j<n; j++) {
5182           if (roworiented) value = v[i*n+j];
5183           else value = v[i+j*m];
5184           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5185           if (in[j] >= cstart && in[j] < cend) {
5186             col = in[j] - cstart;
5187             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5188           } else if (in[j] < 0) continue;
5189 #if defined(PETSC_USE_DEBUG)
5190           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5191 #endif
5192           else {
5193             if (mat->was_assembled) {
5194               if (!aij->colmap) {
5195                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5196               }
5197 #if defined(PETSC_USE_CTABLE)
5198               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5199               col--;
5200 #else
5201               col = aij->colmap[in[j]] - 1;
5202 #endif
5203               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5204                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5205                 col  =  in[j];
5206                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5207                 B     = aij->B;
5208                 b     = (Mat_SeqAIJ*)B->data;
5209                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5210                 rp2   = bj + bi[row];
5211                 ap2   = ba + bi[row];
5212                 rmax2 = bimax[row];
5213                 nrow2 = bilen[row];
5214                 low2  = 0;
5215                 high2 = nrow2;
5216                 bm    = aij->B->rmap->n;
5217                 ba    = b->a;
5218               }
5219             } else col = in[j];
5220             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5221           }
5222         }
5223       } else if (!aij->donotstash) {
5224         if (roworiented) {
5225           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5226         } else {
5227           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5228         }
5229       }
5230     }
5231   }
5232   PetscFunctionReturnVoid();
5233 }
5234 
5235