xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision de196d0083ff058e71880dfa38e0c07d4e33cbfb)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685   InsertMode     addv;
686 
687   PetscFunctionBegin;
688   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
689 
690   /* make sure all processors are either in INSERTMODE or ADDMODE */
691   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
692   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
693   mat->insertmode = addv; /* in case this processor had no cache */
694 
695   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
696   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
697   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
703 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
704 {
705   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
706   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
707   PetscErrorCode ierr;
708   PetscMPIInt    n;
709   PetscInt       i,j,rstart,ncols,flg;
710   PetscInt       *row,*col;
711   PetscBool      other_disassembled;
712   PetscScalar    *val;
713   InsertMode     addv = mat->insertmode;
714 
715   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
716 
717   PetscFunctionBegin;
718   if (!aij->donotstash && !mat->nooffprocentries) {
719     while (1) {
720       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
721       if (!flg) break;
722 
723       for (i=0; i<n; ) {
724         /* Now identify the consecutive vals belonging to the same row */
725         for (j=i,rstart=row[j]; j<n; j++) {
726           if (row[j] != rstart) break;
727         }
728         if (j < n) ncols = j-i;
729         else       ncols = n-i;
730         /* Now assemble all these values with a single function call */
731         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
732 
733         i = j;
734       }
735     }
736     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
737   }
738   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
740 
741   /* determine if any processor has disassembled, if so we must
742      also disassemble ourselfs, in order that we may reassemble. */
743   /*
744      if nonzero structure of submatrix B cannot change then we know that
745      no processor disassembled thus we can skip this stuff
746   */
747   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
748     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
749     if (mat->was_assembled && !other_disassembled) {
750       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
751     }
752   }
753   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
754     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
755   }
756   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
757   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
758   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
759 
760   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
761 
762   aij->rowvalues = 0;
763 
764   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
765   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
766 
767   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
768   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
769     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
770     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
777 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
778 {
779   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
780   PetscErrorCode ierr;
781 
782   PetscFunctionBegin;
783   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
784   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatZeroRows_MPIAIJ"
790 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
791 {
792   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
793   PetscInt      *owners = A->rmap->range;
794   PetscInt       n      = A->rmap->n;
795   PetscSF        sf;
796   PetscInt      *lrows;
797   PetscSFNode   *rrows;
798   PetscInt       r, p = 0, len = 0;
799   PetscErrorCode ierr;
800 
801   PetscFunctionBegin;
802   /* Create SF where leaves are input rows and roots are owned rows */
803   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
804   for (r = 0; r < n; ++r) lrows[r] = -1;
805   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
806   for (r = 0; r < N; ++r) {
807     const PetscInt idx   = rows[r];
808     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
809     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
810       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
811     }
812     if (A->nooffproczerorows) {
813       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
814       lrows[len++] = idx - owners[p];
815     } else {
816       rrows[r].rank = p;
817       rrows[r].index = rows[r] - owners[p];
818     }
819   }
820   if (!A->nooffproczerorows) {
821     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
822     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
823     /* Collect flags for rows to be zeroed */
824     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
825     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
826     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
827     /* Compress and put in row numbers */
828     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
829   }
830   /* fix right hand side if needed */
831   if (x && b) {
832     const PetscScalar *xx;
833     PetscScalar       *bb;
834 
835     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
836     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
837     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
838     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
839     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
840   }
841   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
843   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
844     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
848     for (r = 0; r < len; ++r) {
849       const PetscInt row = lrows[r] + A->rmap->rstart;
850       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
851     }
852     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   } else {
855     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
856   }
857   ierr = PetscFree(lrows);CHKERRQ(ierr);
858 
859   /* only change matrix nonzero state if pattern was allowed to be changed */
860   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
861     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
862     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
869 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
870 {
871   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
872   PetscErrorCode    ierr;
873   PetscMPIInt       n = A->rmap->n;
874   PetscInt          i,j,r,m,p = 0,len = 0;
875   PetscInt          *lrows,*owners = A->rmap->range;
876   PetscSFNode       *rrows;
877   PetscSF           sf;
878   const PetscScalar *xx;
879   PetscScalar       *bb,*mask;
880   Vec               xmask,lmask;
881   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
882   const PetscInt    *aj, *ii,*ridx;
883   PetscScalar       *aa;
884 
885   PetscFunctionBegin;
886   /* Create SF where leaves are input rows and roots are owned rows */
887   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
888   for (r = 0; r < n; ++r) lrows[r] = -1;
889   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
890   for (r = 0; r < N; ++r) {
891     const PetscInt idx   = rows[r];
892     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
893     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
894       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
895     }
896     rrows[r].rank  = p;
897     rrows[r].index = rows[r] - owners[p];
898   }
899   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
900   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
901   /* Collect flags for rows to be zeroed */
902   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
903   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
904   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
905   /* Compress and put in row numbers */
906   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
907   /* zero diagonal part of matrix */
908   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
909   /* handle off diagonal part of matrix */
910   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
911   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
912   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
913   for (i=0; i<len; i++) bb[lrows[i]] = 1;
914   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
915   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
918   if (x) {
919     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
922     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
923   }
924   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
925   /* remove zeroed rows of off diagonal matrix */
926   ii = aij->i;
927   for (i=0; i<len; i++) {
928     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
929   }
930   /* loop over all elements of off process part of matrix zeroing removed columns*/
931   if (aij->compressedrow.use) {
932     m    = aij->compressedrow.nrows;
933     ii   = aij->compressedrow.i;
934     ridx = aij->compressedrow.rindex;
935     for (i=0; i<m; i++) {
936       n  = ii[i+1] - ii[i];
937       aj = aij->j + ii[i];
938       aa = aij->a + ii[i];
939 
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[*ridx] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948       ridx++;
949     }
950   } else { /* do not use compressed row format */
951     m = l->B->rmap->n;
952     for (i=0; i<m; i++) {
953       n  = ii[i+1] - ii[i];
954       aj = aij->j + ii[i];
955       aa = aij->a + ii[i];
956       for (j=0; j<n; j++) {
957         if (PetscAbsScalar(mask[*aj])) {
958           if (b) bb[i] -= *aa*xx[*aj];
959           *aa = 0.0;
960         }
961         aa++;
962         aj++;
963       }
964     }
965   }
966   if (x) {
967     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
968     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   }
970   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
971   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
972   ierr = PetscFree(lrows);CHKERRQ(ierr);
973 
974   /* only change matrix nonzero state if pattern was allowed to be changed */
975   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
976     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
977     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatMult_MPIAIJ"
984 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
985 {
986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
987   PetscErrorCode ierr;
988   PetscInt       nt;
989 
990   PetscFunctionBegin;
991   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
992   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
993   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
995   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1002 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1003 {
1004   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1014 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1015 {
1016   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1022   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1029 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1030 {
1031   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1032   PetscErrorCode ierr;
1033   PetscBool      merged;
1034 
1035   PetscFunctionBegin;
1036   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1037   /* do nondiagonal part */
1038   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1039   if (!merged) {
1040     /* send it on its way */
1041     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1042     /* do local part */
1043     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1044     /* receive remote parts: note this assumes the values are not actually */
1045     /* added in yy until the next line, */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   } else {
1048     /* do local part */
1049     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1050     /* send it on its way */
1051     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1052     /* values actually were received in the Begin() but we need to call this nop */
1053     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1060 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1061 {
1062   MPI_Comm       comm;
1063   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1064   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1065   IS             Me,Notme;
1066   PetscErrorCode ierr;
1067   PetscInt       M,N,first,last,*notme,i;
1068   PetscMPIInt    size;
1069 
1070   PetscFunctionBegin;
1071   /* Easy test: symmetric diagonal block */
1072   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1073   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1074   if (!*f) PetscFunctionReturn(0);
1075   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1076   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1077   if (size == 1) PetscFunctionReturn(0);
1078 
1079   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1080   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1081   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1082   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1083   for (i=0; i<first; i++) notme[i] = i;
1084   for (i=last; i<M; i++) notme[i-last+first] = i;
1085   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1086   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1087   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1088   Aoff = Aoffs[0];
1089   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1090   Boff = Boffs[0];
1091   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1092   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1093   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1094   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1095   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1096   ierr = PetscFree(notme);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1102 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   /* do nondiagonal part */
1109   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1110   /* send it on its way */
1111   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1112   /* do local part */
1113   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1114   /* receive remote parts */
1115   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*
1120   This only works correctly for square matrices where the subblock A->A is the
1121    diagonal block
1122 */
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1125 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1126 {
1127   PetscErrorCode ierr;
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129 
1130   PetscFunctionBegin;
1131   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1132   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1133   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatScale_MPIAIJ"
1139 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1140 {
1141   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1146   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatDestroy_MPIAIJ"
1152 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1153 {
1154   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158 #if defined(PETSC_USE_LOG)
1159   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1160 #endif
1161   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1162   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1163   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1164   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1165 #if defined(PETSC_USE_CTABLE)
1166   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1167 #else
1168   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1169 #endif
1170   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1171   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1172   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1173   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1174   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1175   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1176 
1177   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1186 #if defined(PETSC_HAVE_ELEMENTAL)
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1188 #endif
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1194 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1195 {
1196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1197   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1198   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1199   PetscErrorCode ierr;
1200   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1201   int            fd;
1202   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1203   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1204   PetscScalar    *column_values;
1205   PetscInt       message_count,flowcontrolcount;
1206   FILE           *file;
1207 
1208   PetscFunctionBegin;
1209   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1210   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1211   nz   = A->nz + B->nz;
1212   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1213   if (!rank) {
1214     header[0] = MAT_FILE_CLASSID;
1215     header[1] = mat->rmap->N;
1216     header[2] = mat->cmap->N;
1217 
1218     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     /* get largest number of rows any processor has */
1221     rlen  = mat->rmap->n;
1222     range = mat->rmap->range;
1223     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1224   } else {
1225     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     rlen = mat->rmap->n;
1227   }
1228 
1229   /* load up the local row counts */
1230   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1231   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1232 
1233   /* store the row lengths to the file */
1234   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1235   if (!rank) {
1236     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1239       rlen = range[i+1] - range[i];
1240       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1246     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1248   }
1249   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1250 
1251   /* load up the local column indices */
1252   nzmax = nz; /* th processor needs space a largest processor needs */
1253   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1255   cnt   = 0;
1256   for (i=0; i<mat->rmap->n; i++) {
1257     for (j=B->i[i]; j<B->i[i+1]; j++) {
1258       if ((col = garray[B->j[j]]) > cstart) break;
1259       column_indices[cnt++] = col;
1260     }
1261     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1262     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1263   }
1264   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1265 
1266   /* store the column indices to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1275       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1277     }
1278     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1279   } else {
1280     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1281     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1284   }
1285   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1286 
1287   /* load up the local column values */
1288   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1289   cnt  = 0;
1290   for (i=0; i<mat->rmap->n; i++) {
1291     for (j=B->i[i]; j<B->i[i+1]; j++) {
1292       if (garray[B->j[j]] > cstart) break;
1293       column_values[cnt++] = B->a[j];
1294     }
1295     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1296     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1297   }
1298   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1299 
1300   /* store the column values to the file */
1301   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1302   if (!rank) {
1303     MPI_Status status;
1304     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1305     for (i=1; i<size; i++) {
1306       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1307       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1308       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1309       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1311     }
1312     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1313   } else {
1314     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1315     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1316     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1317     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1318   }
1319   ierr = PetscFree(column_values);CHKERRQ(ierr);
1320 
1321   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1322   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #include <petscdraw.h>
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1329 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1330 {
1331   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1332   PetscErrorCode    ierr;
1333   PetscMPIInt       rank = aij->rank,size = aij->size;
1334   PetscBool         isdraw,iascii,isbinary;
1335   PetscViewer       sviewer;
1336   PetscViewerFormat format;
1337 
1338   PetscFunctionBegin;
1339   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1340   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1341   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1342   if (iascii) {
1343     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1344     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1345       MatInfo   info;
1346       PetscBool inodes;
1347 
1348       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1349       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1350       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1351       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1352       if (!inodes) {
1353         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1354                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1355       } else {
1356         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1357                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1358       }
1359       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1361       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1362       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1363       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1365       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1366       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1367       PetscFunctionReturn(0);
1368     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1369       PetscInt inodecount,inodelimit,*inodes;
1370       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1371       if (inodes) {
1372         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1373       } else {
1374         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1375       }
1376       PetscFunctionReturn(0);
1377     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1378       PetscFunctionReturn(0);
1379     }
1380   } else if (isbinary) {
1381     if (size == 1) {
1382       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1383       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1384     } else {
1385       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1386     }
1387     PetscFunctionReturn(0);
1388   } else if (isdraw) {
1389     PetscDraw draw;
1390     PetscBool isnull;
1391     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1392     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1393   }
1394 
1395   {
1396     /* assemble the entire matrix onto first processor. */
1397     Mat        A;
1398     Mat_SeqAIJ *Aloc;
1399     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1400     MatScalar  *a;
1401 
1402     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1403     if (!rank) {
1404       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1405     } else {
1406       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1407     }
1408     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1409     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1410     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1411     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1412     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1413 
1414     /* copy over the A part */
1415     Aloc = (Mat_SeqAIJ*)aij->A->data;
1416     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1417     row  = mat->rmap->rstart;
1418     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1423     }
1424     aj = Aloc->j;
1425     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1426 
1427     /* copy over the B part */
1428     Aloc = (Mat_SeqAIJ*)aij->B->data;
1429     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1430     row  = mat->rmap->rstart;
1431     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1432     ct   = cols;
1433     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1434     for (i=0; i<m; i++) {
1435       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1436       row++;
1437       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1438     }
1439     ierr = PetscFree(ct);CHKERRQ(ierr);
1440     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1441     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1442     /*
1443        Everyone has to call to draw the matrix since the graphics waits are
1444        synchronized across all processors that share the PetscDraw object
1445     */
1446     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1447     if (!rank) {
1448       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1449       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1450     }
1451     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1452     ierr = MatDestroy(&A);CHKERRQ(ierr);
1453   }
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatView_MPIAIJ"
1459 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1460 {
1461   PetscErrorCode ierr;
1462   PetscBool      iascii,isdraw,issocket,isbinary;
1463 
1464   PetscFunctionBegin;
1465   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1469   if (iascii || isdraw || isbinary || issocket) {
1470     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatSOR_MPIAIJ"
1477 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1478 {
1479   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1480   PetscErrorCode ierr;
1481   Vec            bb1 = 0;
1482   PetscBool      hasop;
1483 
1484   PetscFunctionBegin;
1485   if (flag == SOR_APPLY_UPPER) {
1486     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1487     PetscFunctionReturn(0);
1488   }
1489 
1490   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1491     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1492   }
1493 
1494   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1495     if (flag & SOR_ZERO_INITIAL_GUESS) {
1496       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1497       its--;
1498     }
1499 
1500     while (its--) {
1501       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503 
1504       /* update rhs: bb1 = bb - B*x */
1505       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1506       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1507 
1508       /* local sweep */
1509       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1510     }
1511   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1512     if (flag & SOR_ZERO_INITIAL_GUESS) {
1513       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1514       its--;
1515     }
1516     while (its--) {
1517       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519 
1520       /* update rhs: bb1 = bb - B*x */
1521       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1522       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1523 
1524       /* local sweep */
1525       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1526     }
1527   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1528     if (flag & SOR_ZERO_INITIAL_GUESS) {
1529       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1530       its--;
1531     }
1532     while (its--) {
1533       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1534       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535 
1536       /* update rhs: bb1 = bb - B*x */
1537       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1538       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1539 
1540       /* local sweep */
1541       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1542     }
1543   } else if (flag & SOR_EISENSTAT) {
1544     Vec xx1;
1545 
1546     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1547     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1548 
1549     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1550     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551     if (!mat->diag) {
1552       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1553       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1554     }
1555     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1556     if (hasop) {
1557       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1558     } else {
1559       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1560     }
1561     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1562 
1563     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1564 
1565     /* local sweep */
1566     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1567     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1568     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1569   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1570 
1571   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatPermute_MPIAIJ"
1577 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1578 {
1579   Mat            aA,aB,Aperm;
1580   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1581   PetscScalar    *aa,*ba;
1582   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1583   PetscSF        rowsf,sf;
1584   IS             parcolp = NULL;
1585   PetscBool      done;
1586   PetscErrorCode ierr;
1587 
1588   PetscFunctionBegin;
1589   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1590   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1591   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1592   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1593 
1594   /* Invert row permutation to find out where my rows should go */
1595   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1596   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1597   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1598   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1599   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1600   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1601 
1602   /* Invert column permutation to find out where my columns should go */
1603   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1604   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1605   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1606   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1607   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1608   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1609   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1610 
1611   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1612   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1613   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1614 
1615   /* Find out where my gcols should go */
1616   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1617   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1618   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1619   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1620   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1621   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1622   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1623   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1624 
1625   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1626   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1627   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1628   for (i=0; i<m; i++) {
1629     PetscInt row = rdest[i],rowner;
1630     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1631     for (j=ai[i]; j<ai[i+1]; j++) {
1632       PetscInt cowner,col = cdest[aj[j]];
1633       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1634       if (rowner == cowner) dnnz[i]++;
1635       else onnz[i]++;
1636     }
1637     for (j=bi[i]; j<bi[i+1]; j++) {
1638       PetscInt cowner,col = gcdest[bj[j]];
1639       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1640       if (rowner == cowner) dnnz[i]++;
1641       else onnz[i]++;
1642     }
1643   }
1644   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1645   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1646   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1647   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1648   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1649 
1650   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1651   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1652   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1653   for (i=0; i<m; i++) {
1654     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1655     PetscInt j0,rowlen;
1656     rowlen = ai[i+1] - ai[i];
1657     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1658       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1659       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1660     }
1661     rowlen = bi[i+1] - bi[i];
1662     for (j0=j=0; j<rowlen; j0=j) {
1663       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1664       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1665     }
1666   }
1667   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1670   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1671   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1672   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1673   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1674   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1675   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1676   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1677   *B = Aperm;
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1683 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1684 {
1685   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1686   PetscErrorCode ierr;
1687 
1688   PetscFunctionBegin;
1689   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1690   if (ghosts) *ghosts = aij->garray;
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1696 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1697 {
1698   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1699   Mat            A    = mat->A,B = mat->B;
1700   PetscErrorCode ierr;
1701   PetscReal      isend[5],irecv[5];
1702 
1703   PetscFunctionBegin;
1704   info->block_size = 1.0;
1705   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1706 
1707   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1708   isend[3] = info->memory;  isend[4] = info->mallocs;
1709 
1710   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1711 
1712   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1713   isend[3] += info->memory;  isend[4] += info->mallocs;
1714   if (flag == MAT_LOCAL) {
1715     info->nz_used      = isend[0];
1716     info->nz_allocated = isend[1];
1717     info->nz_unneeded  = isend[2];
1718     info->memory       = isend[3];
1719     info->mallocs      = isend[4];
1720   } else if (flag == MAT_GLOBAL_MAX) {
1721     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1722 
1723     info->nz_used      = irecv[0];
1724     info->nz_allocated = irecv[1];
1725     info->nz_unneeded  = irecv[2];
1726     info->memory       = irecv[3];
1727     info->mallocs      = irecv[4];
1728   } else if (flag == MAT_GLOBAL_SUM) {
1729     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1730 
1731     info->nz_used      = irecv[0];
1732     info->nz_allocated = irecv[1];
1733     info->nz_unneeded  = irecv[2];
1734     info->memory       = irecv[3];
1735     info->mallocs      = irecv[4];
1736   }
1737   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1738   info->fill_ratio_needed = 0;
1739   info->factor_mallocs    = 0;
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetOption_MPIAIJ"
1745 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1746 {
1747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1748   PetscErrorCode ierr;
1749 
1750   PetscFunctionBegin;
1751   switch (op) {
1752   case MAT_NEW_NONZERO_LOCATIONS:
1753   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1754   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1755   case MAT_KEEP_NONZERO_PATTERN:
1756   case MAT_NEW_NONZERO_LOCATION_ERR:
1757   case MAT_USE_INODES:
1758   case MAT_IGNORE_ZERO_ENTRIES:
1759     MatCheckPreallocated(A,1);
1760     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1761     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1762     break;
1763   case MAT_ROW_ORIENTED:
1764     a->roworiented = flg;
1765 
1766     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1767     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1768     break;
1769   case MAT_NEW_DIAGONALS:
1770     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1771     break;
1772   case MAT_IGNORE_OFF_PROC_ENTRIES:
1773     a->donotstash = flg;
1774     break;
1775   case MAT_SPD:
1776     A->spd_set = PETSC_TRUE;
1777     A->spd     = flg;
1778     if (flg) {
1779       A->symmetric                  = PETSC_TRUE;
1780       A->structurally_symmetric     = PETSC_TRUE;
1781       A->symmetric_set              = PETSC_TRUE;
1782       A->structurally_symmetric_set = PETSC_TRUE;
1783     }
1784     break;
1785   case MAT_SYMMETRIC:
1786     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1787     break;
1788   case MAT_STRUCTURALLY_SYMMETRIC:
1789     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1790     break;
1791   case MAT_HERMITIAN:
1792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1793     break;
1794   case MAT_SYMMETRY_ETERNAL:
1795     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1796     break;
1797   default:
1798     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1799   }
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatGetRow_MPIAIJ"
1805 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1806 {
1807   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1808   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1809   PetscErrorCode ierr;
1810   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1811   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1812   PetscInt       *cmap,*idx_p;
1813 
1814   PetscFunctionBegin;
1815   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1816   mat->getrowactive = PETSC_TRUE;
1817 
1818   if (!mat->rowvalues && (idx || v)) {
1819     /*
1820         allocate enough space to hold information from the longest row.
1821     */
1822     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1823     PetscInt   max = 1,tmp;
1824     for (i=0; i<matin->rmap->n; i++) {
1825       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1826       if (max < tmp) max = tmp;
1827     }
1828     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1829   }
1830 
1831   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1832   lrow = row - rstart;
1833 
1834   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1835   if (!v)   {pvA = 0; pvB = 0;}
1836   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1837   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1838   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1839   nztot = nzA + nzB;
1840 
1841   cmap = mat->garray;
1842   if (v  || idx) {
1843     if (nztot) {
1844       /* Sort by increasing column numbers, assuming A and B already sorted */
1845       PetscInt imark = -1;
1846       if (v) {
1847         *v = v_p = mat->rowvalues;
1848         for (i=0; i<nzB; i++) {
1849           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1850           else break;
1851         }
1852         imark = i;
1853         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1854         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1855       }
1856       if (idx) {
1857         *idx = idx_p = mat->rowindices;
1858         if (imark > -1) {
1859           for (i=0; i<imark; i++) {
1860             idx_p[i] = cmap[cworkB[i]];
1861           }
1862         } else {
1863           for (i=0; i<nzB; i++) {
1864             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1865             else break;
1866           }
1867           imark = i;
1868         }
1869         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1870         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1871       }
1872     } else {
1873       if (idx) *idx = 0;
1874       if (v)   *v   = 0;
1875     }
1876   }
1877   *nz  = nztot;
1878   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1879   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1885 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1886 {
1887   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1888 
1889   PetscFunctionBegin;
1890   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1891   aij->getrowactive = PETSC_FALSE;
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 #undef __FUNCT__
1896 #define __FUNCT__ "MatNorm_MPIAIJ"
1897 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1898 {
1899   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1900   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1901   PetscErrorCode ierr;
1902   PetscInt       i,j,cstart = mat->cmap->rstart;
1903   PetscReal      sum = 0.0;
1904   MatScalar      *v;
1905 
1906   PetscFunctionBegin;
1907   if (aij->size == 1) {
1908     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1909   } else {
1910     if (type == NORM_FROBENIUS) {
1911       v = amat->a;
1912       for (i=0; i<amat->nz; i++) {
1913         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1914       }
1915       v = bmat->a;
1916       for (i=0; i<bmat->nz; i++) {
1917         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1918       }
1919       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1920       *norm = PetscSqrtReal(*norm);
1921     } else if (type == NORM_1) { /* max column norm */
1922       PetscReal *tmp,*tmp2;
1923       PetscInt  *jj,*garray = aij->garray;
1924       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1925       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1926       *norm = 0.0;
1927       v     = amat->a; jj = amat->j;
1928       for (j=0; j<amat->nz; j++) {
1929         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1930       }
1931       v = bmat->a; jj = bmat->j;
1932       for (j=0; j<bmat->nz; j++) {
1933         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1934       }
1935       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1936       for (j=0; j<mat->cmap->N; j++) {
1937         if (tmp2[j] > *norm) *norm = tmp2[j];
1938       }
1939       ierr = PetscFree(tmp);CHKERRQ(ierr);
1940       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1941     } else if (type == NORM_INFINITY) { /* max row norm */
1942       PetscReal ntemp = 0.0;
1943       for (j=0; j<aij->A->rmap->n; j++) {
1944         v   = amat->a + amat->i[j];
1945         sum = 0.0;
1946         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1947           sum += PetscAbsScalar(*v); v++;
1948         }
1949         v = bmat->a + bmat->i[j];
1950         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1951           sum += PetscAbsScalar(*v); v++;
1952         }
1953         if (sum > ntemp) ntemp = sum;
1954       }
1955       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1956     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1957   }
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatTranspose_MPIAIJ"
1963 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1964 {
1965   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1966   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1967   PetscErrorCode ierr;
1968   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1969   PetscInt       cstart = A->cmap->rstart,ncol;
1970   Mat            B;
1971   MatScalar      *array;
1972 
1973   PetscFunctionBegin;
1974   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1975 
1976   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1977   ai = Aloc->i; aj = Aloc->j;
1978   bi = Bloc->i; bj = Bloc->j;
1979   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1980     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1981     PetscSFNode          *oloc;
1982     PETSC_UNUSED PetscSF sf;
1983 
1984     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1985     /* compute d_nnz for preallocation */
1986     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1987     for (i=0; i<ai[ma]; i++) {
1988       d_nnz[aj[i]]++;
1989       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1990     }
1991     /* compute local off-diagonal contributions */
1992     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1993     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1994     /* map those to global */
1995     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1996     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1997     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1998     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1999     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2000     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2001     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2002 
2003     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2004     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2005     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2006     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2007     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2008     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2009   } else {
2010     B    = *matout;
2011     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2012     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2013   }
2014 
2015   /* copy over the A part */
2016   array = Aloc->a;
2017   row   = A->rmap->rstart;
2018   for (i=0; i<ma; i++) {
2019     ncol = ai[i+1]-ai[i];
2020     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2021     row++;
2022     array += ncol; aj += ncol;
2023   }
2024   aj = Aloc->j;
2025   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2026 
2027   /* copy over the B part */
2028   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2029   array = Bloc->a;
2030   row   = A->rmap->rstart;
2031   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2032   cols_tmp = cols;
2033   for (i=0; i<mb; i++) {
2034     ncol = bi[i+1]-bi[i];
2035     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2036     row++;
2037     array += ncol; cols_tmp += ncol;
2038   }
2039   ierr = PetscFree(cols);CHKERRQ(ierr);
2040 
2041   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2042   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2043   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2044     *matout = B;
2045   } else {
2046     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2047   }
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2053 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2054 {
2055   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2056   Mat            a    = aij->A,b = aij->B;
2057   PetscErrorCode ierr;
2058   PetscInt       s1,s2,s3;
2059 
2060   PetscFunctionBegin;
2061   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2062   if (rr) {
2063     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2064     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2065     /* Overlap communication with computation. */
2066     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2067   }
2068   if (ll) {
2069     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2070     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2071     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2072   }
2073   /* scale  the diagonal block */
2074   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2075 
2076   if (rr) {
2077     /* Do a scatter end and then right scale the off-diagonal block */
2078     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2079     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2080   }
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2086 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2087 {
2088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2089   PetscErrorCode ierr;
2090 
2091   PetscFunctionBegin;
2092   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 #undef __FUNCT__
2097 #define __FUNCT__ "MatEqual_MPIAIJ"
2098 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2099 {
2100   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2101   Mat            a,b,c,d;
2102   PetscBool      flg;
2103   PetscErrorCode ierr;
2104 
2105   PetscFunctionBegin;
2106   a = matA->A; b = matA->B;
2107   c = matB->A; d = matB->B;
2108 
2109   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2110   if (flg) {
2111     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2112   }
2113   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 #undef __FUNCT__
2118 #define __FUNCT__ "MatCopy_MPIAIJ"
2119 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2120 {
2121   PetscErrorCode ierr;
2122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2123   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2124 
2125   PetscFunctionBegin;
2126   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2127   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2128     /* because of the column compression in the off-processor part of the matrix a->B,
2129        the number of columns in a->B and b->B may be different, hence we cannot call
2130        the MatCopy() directly on the two parts. If need be, we can provide a more
2131        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2132        then copying the submatrices */
2133     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2134   } else {
2135     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2136     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2137   }
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 #undef __FUNCT__
2142 #define __FUNCT__ "MatSetUp_MPIAIJ"
2143 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2144 {
2145   PetscErrorCode ierr;
2146 
2147   PetscFunctionBegin;
2148   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 /*
2153    Computes the number of nonzeros per row needed for preallocation when X and Y
2154    have different nonzero structure.
2155 */
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2158 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2159 {
2160   PetscInt       i,j,k,nzx,nzy;
2161 
2162   PetscFunctionBegin;
2163   /* Set the number of nonzeros in the new matrix */
2164   for (i=0; i<m; i++) {
2165     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2166     nzx = xi[i+1] - xi[i];
2167     nzy = yi[i+1] - yi[i];
2168     nnz[i] = 0;
2169     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2170       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2171       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2172       nnz[i]++;
2173     }
2174     for (; k<nzy; k++) nnz[i]++;
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2182 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2183 {
2184   PetscErrorCode ierr;
2185   PetscInt       m = Y->rmap->N;
2186   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2187   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2188 
2189   PetscFunctionBegin;
2190   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatAXPY_MPIAIJ"
2196 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2197 {
2198   PetscErrorCode ierr;
2199   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2200   PetscBLASInt   bnz,one=1;
2201   Mat_SeqAIJ     *x,*y;
2202 
2203   PetscFunctionBegin;
2204   if (str == SAME_NONZERO_PATTERN) {
2205     PetscScalar alpha = a;
2206     x    = (Mat_SeqAIJ*)xx->A->data;
2207     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2208     y    = (Mat_SeqAIJ*)yy->A->data;
2209     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2210     x    = (Mat_SeqAIJ*)xx->B->data;
2211     y    = (Mat_SeqAIJ*)yy->B->data;
2212     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2213     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2214     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2215   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2216     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2217   } else {
2218     Mat      B;
2219     PetscInt *nnz_d,*nnz_o;
2220     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2221     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2222     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2223     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2224     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2225     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2226     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2227     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2228     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2229     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2230     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2231     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2232     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2233     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2234   }
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2239 
2240 #undef __FUNCT__
2241 #define __FUNCT__ "MatConjugate_MPIAIJ"
2242 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2243 {
2244 #if defined(PETSC_USE_COMPLEX)
2245   PetscErrorCode ierr;
2246   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2247 
2248   PetscFunctionBegin;
2249   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2250   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2251 #else
2252   PetscFunctionBegin;
2253 #endif
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatRealPart_MPIAIJ"
2259 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2260 {
2261   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2262   PetscErrorCode ierr;
2263 
2264   PetscFunctionBegin;
2265   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2266   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 #undef __FUNCT__
2271 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2272 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2273 {
2274   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2275   PetscErrorCode ierr;
2276 
2277   PetscFunctionBegin;
2278   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2279   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 #undef __FUNCT__
2284 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2285 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2286 {
2287   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2288   PetscErrorCode ierr;
2289   PetscInt       i,*idxb = 0;
2290   PetscScalar    *va,*vb;
2291   Vec            vtmp;
2292 
2293   PetscFunctionBegin;
2294   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2295   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2296   if (idx) {
2297     for (i=0; i<A->rmap->n; i++) {
2298       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2299     }
2300   }
2301 
2302   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2303   if (idx) {
2304     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2305   }
2306   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2307   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2308 
2309   for (i=0; i<A->rmap->n; i++) {
2310     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2311       va[i] = vb[i];
2312       if (idx) idx[i] = a->garray[idxb[i]];
2313     }
2314   }
2315 
2316   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2317   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2318   ierr = PetscFree(idxb);CHKERRQ(ierr);
2319   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 #undef __FUNCT__
2324 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2325 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2326 {
2327   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2328   PetscErrorCode ierr;
2329   PetscInt       i,*idxb = 0;
2330   PetscScalar    *va,*vb;
2331   Vec            vtmp;
2332 
2333   PetscFunctionBegin;
2334   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2335   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2336   if (idx) {
2337     for (i=0; i<A->cmap->n; i++) {
2338       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2339     }
2340   }
2341 
2342   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2343   if (idx) {
2344     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2345   }
2346   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2347   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2348 
2349   for (i=0; i<A->rmap->n; i++) {
2350     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2351       va[i] = vb[i];
2352       if (idx) idx[i] = a->garray[idxb[i]];
2353     }
2354   }
2355 
2356   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2357   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2358   ierr = PetscFree(idxb);CHKERRQ(ierr);
2359   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 #undef __FUNCT__
2364 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2365 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2366 {
2367   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2368   PetscInt       n      = A->rmap->n;
2369   PetscInt       cstart = A->cmap->rstart;
2370   PetscInt       *cmap  = mat->garray;
2371   PetscInt       *diagIdx, *offdiagIdx;
2372   Vec            diagV, offdiagV;
2373   PetscScalar    *a, *diagA, *offdiagA;
2374   PetscInt       r;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2379   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2380   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2381   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2382   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2383   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2384   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2385   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2386   for (r = 0; r < n; ++r) {
2387     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2388       a[r]   = diagA[r];
2389       idx[r] = cstart + diagIdx[r];
2390     } else {
2391       a[r]   = offdiagA[r];
2392       idx[r] = cmap[offdiagIdx[r]];
2393     }
2394   }
2395   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2396   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2397   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2398   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2399   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2400   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 #undef __FUNCT__
2405 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2406 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2407 {
2408   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2409   PetscInt       n      = A->rmap->n;
2410   PetscInt       cstart = A->cmap->rstart;
2411   PetscInt       *cmap  = mat->garray;
2412   PetscInt       *diagIdx, *offdiagIdx;
2413   Vec            diagV, offdiagV;
2414   PetscScalar    *a, *diagA, *offdiagA;
2415   PetscInt       r;
2416   PetscErrorCode ierr;
2417 
2418   PetscFunctionBegin;
2419   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2420   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2421   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2422   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2423   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2424   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2425   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2426   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2427   for (r = 0; r < n; ++r) {
2428     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2429       a[r]   = diagA[r];
2430       idx[r] = cstart + diagIdx[r];
2431     } else {
2432       a[r]   = offdiagA[r];
2433       idx[r] = cmap[offdiagIdx[r]];
2434     }
2435   }
2436   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2437   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2438   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2439   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2440   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2441   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 #undef __FUNCT__
2446 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2447 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2448 {
2449   PetscErrorCode ierr;
2450   Mat            *dummy;
2451 
2452   PetscFunctionBegin;
2453   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2454   *newmat = *dummy;
2455   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 #undef __FUNCT__
2460 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2461 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2462 {
2463   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2464   PetscErrorCode ierr;
2465 
2466   PetscFunctionBegin;
2467   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2473 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2474 {
2475   PetscErrorCode ierr;
2476   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2477 
2478   PetscFunctionBegin;
2479   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2480   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2481   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2482   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatShift_MPIAIJ"
2488 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2489 {
2490   PetscErrorCode ierr;
2491   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2492   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2493 
2494   PetscFunctionBegin;
2495   if (!Y->preallocated) {
2496     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2497   } else if (!aij->nz) {
2498     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2499   }
2500   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 /* -------------------------------------------------------------------*/
2505 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2506                                        MatGetRow_MPIAIJ,
2507                                        MatRestoreRow_MPIAIJ,
2508                                        MatMult_MPIAIJ,
2509                                 /* 4*/ MatMultAdd_MPIAIJ,
2510                                        MatMultTranspose_MPIAIJ,
2511                                        MatMultTransposeAdd_MPIAIJ,
2512                                        0,
2513                                        0,
2514                                        0,
2515                                 /*10*/ 0,
2516                                        0,
2517                                        0,
2518                                        MatSOR_MPIAIJ,
2519                                        MatTranspose_MPIAIJ,
2520                                 /*15*/ MatGetInfo_MPIAIJ,
2521                                        MatEqual_MPIAIJ,
2522                                        MatGetDiagonal_MPIAIJ,
2523                                        MatDiagonalScale_MPIAIJ,
2524                                        MatNorm_MPIAIJ,
2525                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2526                                        MatAssemblyEnd_MPIAIJ,
2527                                        MatSetOption_MPIAIJ,
2528                                        MatZeroEntries_MPIAIJ,
2529                                 /*24*/ MatZeroRows_MPIAIJ,
2530                                        0,
2531                                        0,
2532                                        0,
2533                                        0,
2534                                 /*29*/ MatSetUp_MPIAIJ,
2535                                        0,
2536                                        0,
2537                                        0,
2538                                        0,
2539                                 /*34*/ MatDuplicate_MPIAIJ,
2540                                        0,
2541                                        0,
2542                                        0,
2543                                        0,
2544                                 /*39*/ MatAXPY_MPIAIJ,
2545                                        MatGetSubMatrices_MPIAIJ,
2546                                        MatIncreaseOverlap_MPIAIJ,
2547                                        MatGetValues_MPIAIJ,
2548                                        MatCopy_MPIAIJ,
2549                                 /*44*/ MatGetRowMax_MPIAIJ,
2550                                        MatScale_MPIAIJ,
2551                                        MatShift_MPIAIJ,
2552                                        MatDiagonalSet_MPIAIJ,
2553                                        MatZeroRowsColumns_MPIAIJ,
2554                                 /*49*/ MatSetRandom_MPIAIJ,
2555                                        0,
2556                                        0,
2557                                        0,
2558                                        0,
2559                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2560                                        0,
2561                                        MatSetUnfactored_MPIAIJ,
2562                                        MatPermute_MPIAIJ,
2563                                        0,
2564                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2565                                        MatDestroy_MPIAIJ,
2566                                        MatView_MPIAIJ,
2567                                        0,
2568                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2569                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2570                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2571                                        0,
2572                                        0,
2573                                        0,
2574                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2575                                        MatGetRowMinAbs_MPIAIJ,
2576                                        0,
2577                                        MatSetColoring_MPIAIJ,
2578                                        0,
2579                                        MatSetValuesAdifor_MPIAIJ,
2580                                 /*75*/ MatFDColoringApply_AIJ,
2581                                        0,
2582                                        0,
2583                                        0,
2584                                        MatFindZeroDiagonals_MPIAIJ,
2585                                 /*80*/ 0,
2586                                        0,
2587                                        0,
2588                                 /*83*/ MatLoad_MPIAIJ,
2589                                        0,
2590                                        0,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2595                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2596                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2597                                        MatPtAP_MPIAIJ_MPIAIJ,
2598                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2599                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2600                                        0,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                 /*99*/ 0,
2605                                        0,
2606                                        0,
2607                                        MatConjugate_MPIAIJ,
2608                                        0,
2609                                 /*104*/MatSetValuesRow_MPIAIJ,
2610                                        MatRealPart_MPIAIJ,
2611                                        MatImaginaryPart_MPIAIJ,
2612                                        0,
2613                                        0,
2614                                 /*109*/0,
2615                                        0,
2616                                        MatGetRowMin_MPIAIJ,
2617                                        0,
2618                                        0,
2619                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2620                                        0,
2621                                        MatGetGhosts_MPIAIJ,
2622                                        0,
2623                                        0,
2624                                 /*119*/0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                        MatGetMultiProcBlock_MPIAIJ,
2629                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2630                                        MatGetColumnNorms_MPIAIJ,
2631                                        MatInvertBlockDiagonal_MPIAIJ,
2632                                        0,
2633                                        MatGetSubMatricesMPI_MPIAIJ,
2634                                 /*129*/0,
2635                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2636                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2637                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2638                                        0,
2639                                 /*134*/0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                 /*139*/0,
2645                                        0,
2646                                        0,
2647                                        MatFDColoringSetUp_MPIXAIJ,
2648                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2649                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2650 };
2651 
2652 /* ----------------------------------------------------------------------------------------*/
2653 
2654 #undef __FUNCT__
2655 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2656 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2657 {
2658   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2659   PetscErrorCode ierr;
2660 
2661   PetscFunctionBegin;
2662   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2663   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2664   PetscFunctionReturn(0);
2665 }
2666 
2667 #undef __FUNCT__
2668 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2669 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2670 {
2671   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2672   PetscErrorCode ierr;
2673 
2674   PetscFunctionBegin;
2675   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2676   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2677   PetscFunctionReturn(0);
2678 }
2679 
2680 #undef __FUNCT__
2681 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2682 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2683 {
2684   Mat_MPIAIJ     *b;
2685   PetscErrorCode ierr;
2686 
2687   PetscFunctionBegin;
2688   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2689   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2690   b = (Mat_MPIAIJ*)B->data;
2691 
2692   if (!B->preallocated) {
2693     /* Explicitly create 2 MATSEQAIJ matrices. */
2694     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2695     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2696     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2697     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2698     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2699     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2700     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2701     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2702     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2703     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2704   }
2705 
2706   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2707   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2708   B->preallocated = PETSC_TRUE;
2709   PetscFunctionReturn(0);
2710 }
2711 
2712 #undef __FUNCT__
2713 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2714 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2715 {
2716   Mat            mat;
2717   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2718   PetscErrorCode ierr;
2719 
2720   PetscFunctionBegin;
2721   *newmat = 0;
2722   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2723   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2724   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2725   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2726   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2727   a       = (Mat_MPIAIJ*)mat->data;
2728 
2729   mat->factortype   = matin->factortype;
2730   mat->assembled    = PETSC_TRUE;
2731   mat->insertmode   = NOT_SET_VALUES;
2732   mat->preallocated = PETSC_TRUE;
2733 
2734   a->size         = oldmat->size;
2735   a->rank         = oldmat->rank;
2736   a->donotstash   = oldmat->donotstash;
2737   a->roworiented  = oldmat->roworiented;
2738   a->rowindices   = 0;
2739   a->rowvalues    = 0;
2740   a->getrowactive = PETSC_FALSE;
2741 
2742   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2743   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2744 
2745   if (oldmat->colmap) {
2746 #if defined(PETSC_USE_CTABLE)
2747     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2748 #else
2749     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2750     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2751     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2752 #endif
2753   } else a->colmap = 0;
2754   if (oldmat->garray) {
2755     PetscInt len;
2756     len  = oldmat->B->cmap->n;
2757     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2758     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2759     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2760   } else a->garray = 0;
2761 
2762   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2763   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2764   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2765   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2766   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2767   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2768   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2769   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2770   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2771   *newmat = mat;
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 
2776 
2777 #undef __FUNCT__
2778 #define __FUNCT__ "MatLoad_MPIAIJ"
2779 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2780 {
2781   PetscScalar    *vals,*svals;
2782   MPI_Comm       comm;
2783   PetscErrorCode ierr;
2784   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2785   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2786   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2787   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2788   PetscInt       cend,cstart,n,*rowners;
2789   int            fd;
2790   PetscInt       bs = newMat->rmap->bs;
2791 
2792   PetscFunctionBegin;
2793   /* force binary viewer to load .info file if it has not yet done so */
2794   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2795   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2796   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2797   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2798   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2799   if (!rank) {
2800     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2801     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2802   }
2803 
2804   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2805   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2806   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2807   if (bs < 0) bs = 1;
2808 
2809   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2810   M    = header[1]; N = header[2];
2811 
2812   /* If global sizes are set, check if they are consistent with that given in the file */
2813   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2814   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2815 
2816   /* determine ownership of all (block) rows */
2817   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2818   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2819   else m = newMat->rmap->n; /* Set by user */
2820 
2821   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2822   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2823 
2824   /* First process needs enough room for process with most rows */
2825   if (!rank) {
2826     mmax = rowners[1];
2827     for (i=2; i<=size; i++) {
2828       mmax = PetscMax(mmax, rowners[i]);
2829     }
2830   } else mmax = -1;             /* unused, but compilers complain */
2831 
2832   rowners[0] = 0;
2833   for (i=2; i<=size; i++) {
2834     rowners[i] += rowners[i-1];
2835   }
2836   rstart = rowners[rank];
2837   rend   = rowners[rank+1];
2838 
2839   /* distribute row lengths to all processors */
2840   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2841   if (!rank) {
2842     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2843     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2844     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2845     for (j=0; j<m; j++) {
2846       procsnz[0] += ourlens[j];
2847     }
2848     for (i=1; i<size; i++) {
2849       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2850       /* calculate the number of nonzeros on each processor */
2851       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2852         procsnz[i] += rowlengths[j];
2853       }
2854       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2855     }
2856     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2857   } else {
2858     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2859   }
2860 
2861   if (!rank) {
2862     /* determine max buffer needed and allocate it */
2863     maxnz = 0;
2864     for (i=0; i<size; i++) {
2865       maxnz = PetscMax(maxnz,procsnz[i]);
2866     }
2867     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2868 
2869     /* read in my part of the matrix column indices  */
2870     nz   = procsnz[0];
2871     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2872     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2873 
2874     /* read in every one elses and ship off */
2875     for (i=1; i<size; i++) {
2876       nz   = procsnz[i];
2877       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2878       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2879     }
2880     ierr = PetscFree(cols);CHKERRQ(ierr);
2881   } else {
2882     /* determine buffer space needed for message */
2883     nz = 0;
2884     for (i=0; i<m; i++) {
2885       nz += ourlens[i];
2886     }
2887     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2888 
2889     /* receive message of column indices*/
2890     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2891   }
2892 
2893   /* determine column ownership if matrix is not square */
2894   if (N != M) {
2895     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2896     else n = newMat->cmap->n;
2897     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2898     cstart = cend - n;
2899   } else {
2900     cstart = rstart;
2901     cend   = rend;
2902     n      = cend - cstart;
2903   }
2904 
2905   /* loop over local rows, determining number of off diagonal entries */
2906   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2907   jj   = 0;
2908   for (i=0; i<m; i++) {
2909     for (j=0; j<ourlens[i]; j++) {
2910       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2911       jj++;
2912     }
2913   }
2914 
2915   for (i=0; i<m; i++) {
2916     ourlens[i] -= offlens[i];
2917   }
2918   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2919 
2920   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2921 
2922   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2923 
2924   for (i=0; i<m; i++) {
2925     ourlens[i] += offlens[i];
2926   }
2927 
2928   if (!rank) {
2929     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2930 
2931     /* read in my part of the matrix numerical values  */
2932     nz   = procsnz[0];
2933     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2934 
2935     /* insert into matrix */
2936     jj      = rstart;
2937     smycols = mycols;
2938     svals   = vals;
2939     for (i=0; i<m; i++) {
2940       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2941       smycols += ourlens[i];
2942       svals   += ourlens[i];
2943       jj++;
2944     }
2945 
2946     /* read in other processors and ship out */
2947     for (i=1; i<size; i++) {
2948       nz   = procsnz[i];
2949       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2950       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2951     }
2952     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2953   } else {
2954     /* receive numeric values */
2955     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2956 
2957     /* receive message of values*/
2958     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2959 
2960     /* insert into matrix */
2961     jj      = rstart;
2962     smycols = mycols;
2963     svals   = vals;
2964     for (i=0; i<m; i++) {
2965       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2966       smycols += ourlens[i];
2967       svals   += ourlens[i];
2968       jj++;
2969     }
2970   }
2971   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2972   ierr = PetscFree(vals);CHKERRQ(ierr);
2973   ierr = PetscFree(mycols);CHKERRQ(ierr);
2974   ierr = PetscFree(rowners);CHKERRQ(ierr);
2975   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2976   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 #undef __FUNCT__
2981 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2982 /* TODO: Not scalable because of ISAllGather(). */
2983 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2984 {
2985   PetscErrorCode ierr;
2986   IS             iscol_local;
2987   PetscInt       csize;
2988 
2989   PetscFunctionBegin;
2990   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2991   if (call == MAT_REUSE_MATRIX) {
2992     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2993     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2994   } else {
2995     PetscInt cbs;
2996     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
2997     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2998     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
2999   }
3000   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3001   if (call == MAT_INITIAL_MATRIX) {
3002     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3003     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3004   }
3005   PetscFunctionReturn(0);
3006 }
3007 
3008 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3009 #undef __FUNCT__
3010 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3011 /*
3012     Not great since it makes two copies of the submatrix, first an SeqAIJ
3013   in local and then by concatenating the local matrices the end result.
3014   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3015 
3016   Note: This requires a sequential iscol with all indices.
3017 */
3018 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3019 {
3020   PetscErrorCode ierr;
3021   PetscMPIInt    rank,size;
3022   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3023   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3024   PetscBool      allcolumns, colflag;
3025   Mat            M,Mreuse;
3026   MatScalar      *vwork,*aa;
3027   MPI_Comm       comm;
3028   Mat_SeqAIJ     *aij;
3029 
3030   PetscFunctionBegin;
3031   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3032   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3033   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3034 
3035   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3036   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3037   if (colflag && ncol == mat->cmap->N) {
3038     allcolumns = PETSC_TRUE;
3039   } else {
3040     allcolumns = PETSC_FALSE;
3041   }
3042   if (call ==  MAT_REUSE_MATRIX) {
3043     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3044     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3045     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3046   } else {
3047     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3048   }
3049 
3050   /*
3051       m - number of local rows
3052       n - number of columns (same on all processors)
3053       rstart - first row in new global matrix generated
3054   */
3055   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3056   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3057   if (call == MAT_INITIAL_MATRIX) {
3058     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3059     ii  = aij->i;
3060     jj  = aij->j;
3061 
3062     /*
3063         Determine the number of non-zeros in the diagonal and off-diagonal
3064         portions of the matrix in order to do correct preallocation
3065     */
3066 
3067     /* first get start and end of "diagonal" columns */
3068     if (csize == PETSC_DECIDE) {
3069       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3070       if (mglobal == n) { /* square matrix */
3071         nlocal = m;
3072       } else {
3073         nlocal = n/size + ((n % size) > rank);
3074       }
3075     } else {
3076       nlocal = csize;
3077     }
3078     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3079     rstart = rend - nlocal;
3080     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3081 
3082     /* next, compute all the lengths */
3083     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3084     olens = dlens + m;
3085     for (i=0; i<m; i++) {
3086       jend = ii[i+1] - ii[i];
3087       olen = 0;
3088       dlen = 0;
3089       for (j=0; j<jend; j++) {
3090         if (*jj < rstart || *jj >= rend) olen++;
3091         else dlen++;
3092         jj++;
3093       }
3094       olens[i] = olen;
3095       dlens[i] = dlen;
3096     }
3097     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3098     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3099     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3100     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3101     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3102     ierr = PetscFree(dlens);CHKERRQ(ierr);
3103   } else {
3104     PetscInt ml,nl;
3105 
3106     M    = *newmat;
3107     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3108     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3109     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3110     /*
3111          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3112        rather than the slower MatSetValues().
3113     */
3114     M->was_assembled = PETSC_TRUE;
3115     M->assembled     = PETSC_FALSE;
3116   }
3117   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3118   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3119   ii   = aij->i;
3120   jj   = aij->j;
3121   aa   = aij->a;
3122   for (i=0; i<m; i++) {
3123     row   = rstart + i;
3124     nz    = ii[i+1] - ii[i];
3125     cwork = jj;     jj += nz;
3126     vwork = aa;     aa += nz;
3127     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3128   }
3129 
3130   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3131   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3132   *newmat = M;
3133 
3134   /* save submatrix used in processor for next request */
3135   if (call ==  MAT_INITIAL_MATRIX) {
3136     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3137     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3138   }
3139   PetscFunctionReturn(0);
3140 }
3141 
3142 #undef __FUNCT__
3143 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3144 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3145 {
3146   PetscInt       m,cstart, cend,j,nnz,i,d;
3147   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3148   const PetscInt *JJ;
3149   PetscScalar    *values;
3150   PetscErrorCode ierr;
3151 
3152   PetscFunctionBegin;
3153   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3154 
3155   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3156   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3157   m      = B->rmap->n;
3158   cstart = B->cmap->rstart;
3159   cend   = B->cmap->rend;
3160   rstart = B->rmap->rstart;
3161 
3162   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3163 
3164 #if defined(PETSC_USE_DEBUGGING)
3165   for (i=0; i<m; i++) {
3166     nnz = Ii[i+1]- Ii[i];
3167     JJ  = J + Ii[i];
3168     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3169     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3170     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3171   }
3172 #endif
3173 
3174   for (i=0; i<m; i++) {
3175     nnz     = Ii[i+1]- Ii[i];
3176     JJ      = J + Ii[i];
3177     nnz_max = PetscMax(nnz_max,nnz);
3178     d       = 0;
3179     for (j=0; j<nnz; j++) {
3180       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3181     }
3182     d_nnz[i] = d;
3183     o_nnz[i] = nnz - d;
3184   }
3185   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3186   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3187 
3188   if (v) values = (PetscScalar*)v;
3189   else {
3190     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3191   }
3192 
3193   for (i=0; i<m; i++) {
3194     ii   = i + rstart;
3195     nnz  = Ii[i+1]- Ii[i];
3196     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3197   }
3198   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3199   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3200 
3201   if (!v) {
3202     ierr = PetscFree(values);CHKERRQ(ierr);
3203   }
3204   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3205   PetscFunctionReturn(0);
3206 }
3207 
3208 #undef __FUNCT__
3209 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3210 /*@
3211    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3212    (the default parallel PETSc format).
3213 
3214    Collective on MPI_Comm
3215 
3216    Input Parameters:
3217 +  B - the matrix
3218 .  i - the indices into j for the start of each local row (starts with zero)
3219 .  j - the column indices for each local row (starts with zero)
3220 -  v - optional values in the matrix
3221 
3222    Level: developer
3223 
3224    Notes:
3225        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3226      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3227      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3228 
3229        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3230 
3231        The format which is used for the sparse matrix input, is equivalent to a
3232     row-major ordering.. i.e for the following matrix, the input data expected is
3233     as shown
3234 
3235 $        1 0 0
3236 $        2 0 3     P0
3237 $       -------
3238 $        4 5 6     P1
3239 $
3240 $     Process0 [P0]: rows_owned=[0,1]
3241 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3242 $        j =  {0,0,2}  [size = 3]
3243 $        v =  {1,2,3}  [size = 3]
3244 $
3245 $     Process1 [P1]: rows_owned=[2]
3246 $        i =  {0,3}    [size = nrow+1  = 1+1]
3247 $        j =  {0,1,2}  [size = 3]
3248 $        v =  {4,5,6}  [size = 3]
3249 
3250 .keywords: matrix, aij, compressed row, sparse, parallel
3251 
3252 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3253           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3254 @*/
3255 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3256 {
3257   PetscErrorCode ierr;
3258 
3259   PetscFunctionBegin;
3260   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3261   PetscFunctionReturn(0);
3262 }
3263 
3264 #undef __FUNCT__
3265 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3266 /*@C
3267    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3268    (the default parallel PETSc format).  For good matrix assembly performance
3269    the user should preallocate the matrix storage by setting the parameters
3270    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3271    performance can be increased by more than a factor of 50.
3272 
3273    Collective on MPI_Comm
3274 
3275    Input Parameters:
3276 +  B - the matrix
3277 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3278            (same value is used for all local rows)
3279 .  d_nnz - array containing the number of nonzeros in the various rows of the
3280            DIAGONAL portion of the local submatrix (possibly different for each row)
3281            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3282            The size of this array is equal to the number of local rows, i.e 'm'.
3283            For matrices that will be factored, you must leave room for (and set)
3284            the diagonal entry even if it is zero.
3285 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3286            submatrix (same value is used for all local rows).
3287 -  o_nnz - array containing the number of nonzeros in the various rows of the
3288            OFF-DIAGONAL portion of the local submatrix (possibly different for
3289            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3290            structure. The size of this array is equal to the number
3291            of local rows, i.e 'm'.
3292 
3293    If the *_nnz parameter is given then the *_nz parameter is ignored
3294 
3295    The AIJ format (also called the Yale sparse matrix format or
3296    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3297    storage.  The stored row and column indices begin with zero.
3298    See Users-Manual: ch_mat for details.
3299 
3300    The parallel matrix is partitioned such that the first m0 rows belong to
3301    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3302    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3303 
3304    The DIAGONAL portion of the local submatrix of a processor can be defined
3305    as the submatrix which is obtained by extraction the part corresponding to
3306    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3307    first row that belongs to the processor, r2 is the last row belonging to
3308    the this processor, and c1-c2 is range of indices of the local part of a
3309    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3310    common case of a square matrix, the row and column ranges are the same and
3311    the DIAGONAL part is also square. The remaining portion of the local
3312    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3313 
3314    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3315 
3316    You can call MatGetInfo() to get information on how effective the preallocation was;
3317    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3318    You can also run with the option -info and look for messages with the string
3319    malloc in them to see if additional memory allocation was needed.
3320 
3321    Example usage:
3322 
3323    Consider the following 8x8 matrix with 34 non-zero values, that is
3324    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3325    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3326    as follows:
3327 
3328 .vb
3329             1  2  0  |  0  3  0  |  0  4
3330     Proc0   0  5  6  |  7  0  0  |  8  0
3331             9  0 10  | 11  0  0  | 12  0
3332     -------------------------------------
3333            13  0 14  | 15 16 17  |  0  0
3334     Proc1   0 18  0  | 19 20 21  |  0  0
3335             0  0  0  | 22 23  0  | 24  0
3336     -------------------------------------
3337     Proc2  25 26 27  |  0  0 28  | 29  0
3338            30  0  0  | 31 32 33  |  0 34
3339 .ve
3340 
3341    This can be represented as a collection of submatrices as:
3342 
3343 .vb
3344       A B C
3345       D E F
3346       G H I
3347 .ve
3348 
3349    Where the submatrices A,B,C are owned by proc0, D,E,F are
3350    owned by proc1, G,H,I are owned by proc2.
3351 
3352    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3353    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3354    The 'M','N' parameters are 8,8, and have the same values on all procs.
3355 
3356    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3357    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3358    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3359    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3360    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3361    matrix, ans [DF] as another SeqAIJ matrix.
3362 
3363    When d_nz, o_nz parameters are specified, d_nz storage elements are
3364    allocated for every row of the local diagonal submatrix, and o_nz
3365    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3366    One way to choose d_nz and o_nz is to use the max nonzerors per local
3367    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3368    In this case, the values of d_nz,o_nz are:
3369 .vb
3370      proc0 : dnz = 2, o_nz = 2
3371      proc1 : dnz = 3, o_nz = 2
3372      proc2 : dnz = 1, o_nz = 4
3373 .ve
3374    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3375    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3376    for proc3. i.e we are using 12+15+10=37 storage locations to store
3377    34 values.
3378 
3379    When d_nnz, o_nnz parameters are specified, the storage is specified
3380    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3381    In the above case the values for d_nnz,o_nnz are:
3382 .vb
3383      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3384      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3385      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3386 .ve
3387    Here the space allocated is sum of all the above values i.e 34, and
3388    hence pre-allocation is perfect.
3389 
3390    Level: intermediate
3391 
3392 .keywords: matrix, aij, compressed row, sparse, parallel
3393 
3394 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3395           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3396 @*/
3397 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3398 {
3399   PetscErrorCode ierr;
3400 
3401   PetscFunctionBegin;
3402   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3403   PetscValidType(B,1);
3404   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3405   PetscFunctionReturn(0);
3406 }
3407 
3408 #undef __FUNCT__
3409 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3410 /*@
3411      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3412          CSR format the local rows.
3413 
3414    Collective on MPI_Comm
3415 
3416    Input Parameters:
3417 +  comm - MPI communicator
3418 .  m - number of local rows (Cannot be PETSC_DECIDE)
3419 .  n - This value should be the same as the local size used in creating the
3420        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3421        calculated if N is given) For square matrices n is almost always m.
3422 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3423 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3424 .   i - row indices
3425 .   j - column indices
3426 -   a - matrix values
3427 
3428    Output Parameter:
3429 .   mat - the matrix
3430 
3431    Level: intermediate
3432 
3433    Notes:
3434        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3435      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3436      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3437 
3438        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3439 
3440        The format which is used for the sparse matrix input, is equivalent to a
3441     row-major ordering.. i.e for the following matrix, the input data expected is
3442     as shown
3443 
3444 $        1 0 0
3445 $        2 0 3     P0
3446 $       -------
3447 $        4 5 6     P1
3448 $
3449 $     Process0 [P0]: rows_owned=[0,1]
3450 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3451 $        j =  {0,0,2}  [size = 3]
3452 $        v =  {1,2,3}  [size = 3]
3453 $
3454 $     Process1 [P1]: rows_owned=[2]
3455 $        i =  {0,3}    [size = nrow+1  = 1+1]
3456 $        j =  {0,1,2}  [size = 3]
3457 $        v =  {4,5,6}  [size = 3]
3458 
3459 .keywords: matrix, aij, compressed row, sparse, parallel
3460 
3461 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3462           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3463 @*/
3464 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3465 {
3466   PetscErrorCode ierr;
3467 
3468   PetscFunctionBegin;
3469   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3470   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3471   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3472   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3473   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3474   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3475   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3476   PetscFunctionReturn(0);
3477 }
3478 
3479 #undef __FUNCT__
3480 #define __FUNCT__ "MatCreateAIJ"
3481 /*@C
3482    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3483    (the default parallel PETSc format).  For good matrix assembly performance
3484    the user should preallocate the matrix storage by setting the parameters
3485    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3486    performance can be increased by more than a factor of 50.
3487 
3488    Collective on MPI_Comm
3489 
3490    Input Parameters:
3491 +  comm - MPI communicator
3492 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3493            This value should be the same as the local size used in creating the
3494            y vector for the matrix-vector product y = Ax.
3495 .  n - This value should be the same as the local size used in creating the
3496        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3497        calculated if N is given) For square matrices n is almost always m.
3498 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3499 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3500 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3501            (same value is used for all local rows)
3502 .  d_nnz - array containing the number of nonzeros in the various rows of the
3503            DIAGONAL portion of the local submatrix (possibly different for each row)
3504            or NULL, if d_nz is used to specify the nonzero structure.
3505            The size of this array is equal to the number of local rows, i.e 'm'.
3506 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3507            submatrix (same value is used for all local rows).
3508 -  o_nnz - array containing the number of nonzeros in the various rows of the
3509            OFF-DIAGONAL portion of the local submatrix (possibly different for
3510            each row) or NULL, if o_nz is used to specify the nonzero
3511            structure. The size of this array is equal to the number
3512            of local rows, i.e 'm'.
3513 
3514    Output Parameter:
3515 .  A - the matrix
3516 
3517    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3518    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3519    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3520 
3521    Notes:
3522    If the *_nnz parameter is given then the *_nz parameter is ignored
3523 
3524    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3525    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3526    storage requirements for this matrix.
3527 
3528    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3529    processor than it must be used on all processors that share the object for
3530    that argument.
3531 
3532    The user MUST specify either the local or global matrix dimensions
3533    (possibly both).
3534 
3535    The parallel matrix is partitioned across processors such that the
3536    first m0 rows belong to process 0, the next m1 rows belong to
3537    process 1, the next m2 rows belong to process 2 etc.. where
3538    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3539    values corresponding to [m x N] submatrix.
3540 
3541    The columns are logically partitioned with the n0 columns belonging
3542    to 0th partition, the next n1 columns belonging to the next
3543    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3544 
3545    The DIAGONAL portion of the local submatrix on any given processor
3546    is the submatrix corresponding to the rows and columns m,n
3547    corresponding to the given processor. i.e diagonal matrix on
3548    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3549    etc. The remaining portion of the local submatrix [m x (N-n)]
3550    constitute the OFF-DIAGONAL portion. The example below better
3551    illustrates this concept.
3552 
3553    For a square global matrix we define each processor's diagonal portion
3554    to be its local rows and the corresponding columns (a square submatrix);
3555    each processor's off-diagonal portion encompasses the remainder of the
3556    local matrix (a rectangular submatrix).
3557 
3558    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3559 
3560    When calling this routine with a single process communicator, a matrix of
3561    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3562    type of communicator, use the construction mechanism:
3563      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3564 
3565    By default, this format uses inodes (identical nodes) when possible.
3566    We search for consecutive rows with the same nonzero structure, thereby
3567    reusing matrix information to achieve increased efficiency.
3568 
3569    Options Database Keys:
3570 +  -mat_no_inode  - Do not use inodes
3571 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3572 -  -mat_aij_oneindex - Internally use indexing starting at 1
3573         rather than 0.  Note that when calling MatSetValues(),
3574         the user still MUST index entries starting at 0!
3575 
3576 
3577    Example usage:
3578 
3579    Consider the following 8x8 matrix with 34 non-zero values, that is
3580    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3581    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3582    as follows:
3583 
3584 .vb
3585             1  2  0  |  0  3  0  |  0  4
3586     Proc0   0  5  6  |  7  0  0  |  8  0
3587             9  0 10  | 11  0  0  | 12  0
3588     -------------------------------------
3589            13  0 14  | 15 16 17  |  0  0
3590     Proc1   0 18  0  | 19 20 21  |  0  0
3591             0  0  0  | 22 23  0  | 24  0
3592     -------------------------------------
3593     Proc2  25 26 27  |  0  0 28  | 29  0
3594            30  0  0  | 31 32 33  |  0 34
3595 .ve
3596 
3597    This can be represented as a collection of submatrices as:
3598 
3599 .vb
3600       A B C
3601       D E F
3602       G H I
3603 .ve
3604 
3605    Where the submatrices A,B,C are owned by proc0, D,E,F are
3606    owned by proc1, G,H,I are owned by proc2.
3607 
3608    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3609    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3610    The 'M','N' parameters are 8,8, and have the same values on all procs.
3611 
3612    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3613    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3614    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3615    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3616    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3617    matrix, ans [DF] as another SeqAIJ matrix.
3618 
3619    When d_nz, o_nz parameters are specified, d_nz storage elements are
3620    allocated for every row of the local diagonal submatrix, and o_nz
3621    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3622    One way to choose d_nz and o_nz is to use the max nonzerors per local
3623    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3624    In this case, the values of d_nz,o_nz are:
3625 .vb
3626      proc0 : dnz = 2, o_nz = 2
3627      proc1 : dnz = 3, o_nz = 2
3628      proc2 : dnz = 1, o_nz = 4
3629 .ve
3630    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3631    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3632    for proc3. i.e we are using 12+15+10=37 storage locations to store
3633    34 values.
3634 
3635    When d_nnz, o_nnz parameters are specified, the storage is specified
3636    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3637    In the above case the values for d_nnz,o_nnz are:
3638 .vb
3639      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3640      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3641      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3642 .ve
3643    Here the space allocated is sum of all the above values i.e 34, and
3644    hence pre-allocation is perfect.
3645 
3646    Level: intermediate
3647 
3648 .keywords: matrix, aij, compressed row, sparse, parallel
3649 
3650 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3651           MPIAIJ, MatCreateMPIAIJWithArrays()
3652 @*/
3653 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3654 {
3655   PetscErrorCode ierr;
3656   PetscMPIInt    size;
3657 
3658   PetscFunctionBegin;
3659   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3660   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3661   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3662   if (size > 1) {
3663     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3664     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3665   } else {
3666     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3667     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3668   }
3669   PetscFunctionReturn(0);
3670 }
3671 
3672 #undef __FUNCT__
3673 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3674 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3675 {
3676   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3677 
3678   PetscFunctionBegin;
3679   if (Ad)     *Ad     = a->A;
3680   if (Ao)     *Ao     = a->B;
3681   if (colmap) *colmap = a->garray;
3682   PetscFunctionReturn(0);
3683 }
3684 
3685 #undef __FUNCT__
3686 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3687 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3688 {
3689   PetscErrorCode ierr;
3690   PetscInt       i;
3691   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3692 
3693   PetscFunctionBegin;
3694   if (coloring->ctype == IS_COLORING_GLOBAL) {
3695     ISColoringValue *allcolors,*colors;
3696     ISColoring      ocoloring;
3697 
3698     /* set coloring for diagonal portion */
3699     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3700 
3701     /* set coloring for off-diagonal portion */
3702     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3703     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3704     for (i=0; i<a->B->cmap->n; i++) {
3705       colors[i] = allcolors[a->garray[i]];
3706     }
3707     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3708     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3709     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3710     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3711   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3712     ISColoringValue *colors;
3713     PetscInt        *larray;
3714     ISColoring      ocoloring;
3715 
3716     /* set coloring for diagonal portion */
3717     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3718     for (i=0; i<a->A->cmap->n; i++) {
3719       larray[i] = i + A->cmap->rstart;
3720     }
3721     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3722     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3723     for (i=0; i<a->A->cmap->n; i++) {
3724       colors[i] = coloring->colors[larray[i]];
3725     }
3726     ierr = PetscFree(larray);CHKERRQ(ierr);
3727     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3728     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3729     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3730 
3731     /* set coloring for off-diagonal portion */
3732     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3733     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3734     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3735     for (i=0; i<a->B->cmap->n; i++) {
3736       colors[i] = coloring->colors[larray[i]];
3737     }
3738     ierr = PetscFree(larray);CHKERRQ(ierr);
3739     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3740     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3741     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3742   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3743   PetscFunctionReturn(0);
3744 }
3745 
3746 #undef __FUNCT__
3747 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3748 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3749 {
3750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3751   PetscErrorCode ierr;
3752 
3753   PetscFunctionBegin;
3754   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3755   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 #undef __FUNCT__
3760 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3761 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3762 {
3763   PetscErrorCode ierr;
3764   PetscInt       m,N,i,rstart,nnz,Ii;
3765   PetscInt       *indx;
3766   PetscScalar    *values;
3767 
3768   PetscFunctionBegin;
3769   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3770   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3771     PetscInt       *dnz,*onz,sum,bs,cbs;
3772 
3773     if (n == PETSC_DECIDE) {
3774       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3775     }
3776     /* Check sum(n) = N */
3777     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3778     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3779 
3780     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3781     rstart -= m;
3782 
3783     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3784     for (i=0; i<m; i++) {
3785       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3786       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3787       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3788     }
3789 
3790     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3791     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3792     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3793     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3794     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3795     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3796     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3797   }
3798 
3799   /* numeric phase */
3800   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3801   for (i=0; i<m; i++) {
3802     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3803     Ii   = i + rstart;
3804     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3805     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3806   }
3807   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3808   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3809   PetscFunctionReturn(0);
3810 }
3811 
3812 #undef __FUNCT__
3813 #define __FUNCT__ "MatFileSplit"
3814 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3815 {
3816   PetscErrorCode    ierr;
3817   PetscMPIInt       rank;
3818   PetscInt          m,N,i,rstart,nnz;
3819   size_t            len;
3820   const PetscInt    *indx;
3821   PetscViewer       out;
3822   char              *name;
3823   Mat               B;
3824   const PetscScalar *values;
3825 
3826   PetscFunctionBegin;
3827   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3828   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3829   /* Should this be the type of the diagonal block of A? */
3830   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3831   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3832   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3833   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3834   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3835   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3836   for (i=0; i<m; i++) {
3837     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3838     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3839     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3840   }
3841   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3842   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3843 
3844   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3845   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3846   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3847   sprintf(name,"%s.%d",outfile,rank);
3848   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3849   ierr = PetscFree(name);CHKERRQ(ierr);
3850   ierr = MatView(B,out);CHKERRQ(ierr);
3851   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3852   ierr = MatDestroy(&B);CHKERRQ(ierr);
3853   PetscFunctionReturn(0);
3854 }
3855 
3856 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3857 #undef __FUNCT__
3858 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3859 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3860 {
3861   PetscErrorCode      ierr;
3862   Mat_Merge_SeqsToMPI *merge;
3863   PetscContainer      container;
3864 
3865   PetscFunctionBegin;
3866   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3867   if (container) {
3868     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3869     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3870     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3871     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3872     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3873     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3874     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3875     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3876     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3877     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3878     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3879     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3880     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3881     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3882     ierr = PetscFree(merge);CHKERRQ(ierr);
3883     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3884   }
3885   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3886   PetscFunctionReturn(0);
3887 }
3888 
3889 #include <../src/mat/utils/freespace.h>
3890 #include <petscbt.h>
3891 
3892 #undef __FUNCT__
3893 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3894 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3895 {
3896   PetscErrorCode      ierr;
3897   MPI_Comm            comm;
3898   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3899   PetscMPIInt         size,rank,taga,*len_s;
3900   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3901   PetscInt            proc,m;
3902   PetscInt            **buf_ri,**buf_rj;
3903   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3904   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3905   MPI_Request         *s_waits,*r_waits;
3906   MPI_Status          *status;
3907   MatScalar           *aa=a->a;
3908   MatScalar           **abuf_r,*ba_i;
3909   Mat_Merge_SeqsToMPI *merge;
3910   PetscContainer      container;
3911 
3912   PetscFunctionBegin;
3913   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3914   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3915 
3916   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3917   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3918 
3919   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3920   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3921 
3922   bi     = merge->bi;
3923   bj     = merge->bj;
3924   buf_ri = merge->buf_ri;
3925   buf_rj = merge->buf_rj;
3926 
3927   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3928   owners = merge->rowmap->range;
3929   len_s  = merge->len_s;
3930 
3931   /* send and recv matrix values */
3932   /*-----------------------------*/
3933   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3934   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3935 
3936   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3937   for (proc=0,k=0; proc<size; proc++) {
3938     if (!len_s[proc]) continue;
3939     i    = owners[proc];
3940     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3941     k++;
3942   }
3943 
3944   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3945   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3946   ierr = PetscFree(status);CHKERRQ(ierr);
3947 
3948   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3949   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3950 
3951   /* insert mat values of mpimat */
3952   /*----------------------------*/
3953   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3954   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3955 
3956   for (k=0; k<merge->nrecv; k++) {
3957     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3958     nrows       = *(buf_ri_k[k]);
3959     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3960     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
3961   }
3962 
3963   /* set values of ba */
3964   m = merge->rowmap->n;
3965   for (i=0; i<m; i++) {
3966     arow = owners[rank] + i;
3967     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3968     bnzi = bi[i+1] - bi[i];
3969     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3970 
3971     /* add local non-zero vals of this proc's seqmat into ba */
3972     anzi   = ai[arow+1] - ai[arow];
3973     aj     = a->j + ai[arow];
3974     aa     = a->a + ai[arow];
3975     nextaj = 0;
3976     for (j=0; nextaj<anzi; j++) {
3977       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
3978         ba_i[j] += aa[nextaj++];
3979       }
3980     }
3981 
3982     /* add received vals into ba */
3983     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
3984       /* i-th row */
3985       if (i == *nextrow[k]) {
3986         anzi   = *(nextai[k]+1) - *nextai[k];
3987         aj     = buf_rj[k] + *(nextai[k]);
3988         aa     = abuf_r[k] + *(nextai[k]);
3989         nextaj = 0;
3990         for (j=0; nextaj<anzi; j++) {
3991           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
3992             ba_i[j] += aa[nextaj++];
3993           }
3994         }
3995         nextrow[k]++; nextai[k]++;
3996       }
3997     }
3998     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3999   }
4000   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4001   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4002 
4003   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4004   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4005   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4006   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4007   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4008   PetscFunctionReturn(0);
4009 }
4010 
4011 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4012 
4013 #undef __FUNCT__
4014 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4015 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4016 {
4017   PetscErrorCode      ierr;
4018   Mat                 B_mpi;
4019   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4020   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4021   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4022   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4023   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4024   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4025   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4026   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4027   MPI_Status          *status;
4028   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4029   PetscBT             lnkbt;
4030   Mat_Merge_SeqsToMPI *merge;
4031   PetscContainer      container;
4032 
4033   PetscFunctionBegin;
4034   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4035 
4036   /* make sure it is a PETSc comm */
4037   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4038   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4039   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4040 
4041   ierr = PetscNew(&merge);CHKERRQ(ierr);
4042   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4043 
4044   /* determine row ownership */
4045   /*---------------------------------------------------------*/
4046   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4047   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4048   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4049   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4050   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4051   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4052   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4053 
4054   m      = merge->rowmap->n;
4055   owners = merge->rowmap->range;
4056 
4057   /* determine the number of messages to send, their lengths */
4058   /*---------------------------------------------------------*/
4059   len_s = merge->len_s;
4060 
4061   len          = 0; /* length of buf_si[] */
4062   merge->nsend = 0;
4063   for (proc=0; proc<size; proc++) {
4064     len_si[proc] = 0;
4065     if (proc == rank) {
4066       len_s[proc] = 0;
4067     } else {
4068       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4069       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4070     }
4071     if (len_s[proc]) {
4072       merge->nsend++;
4073       nrows = 0;
4074       for (i=owners[proc]; i<owners[proc+1]; i++) {
4075         if (ai[i+1] > ai[i]) nrows++;
4076       }
4077       len_si[proc] = 2*(nrows+1);
4078       len         += len_si[proc];
4079     }
4080   }
4081 
4082   /* determine the number and length of messages to receive for ij-structure */
4083   /*-------------------------------------------------------------------------*/
4084   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4085   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4086 
4087   /* post the Irecv of j-structure */
4088   /*-------------------------------*/
4089   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4090   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4091 
4092   /* post the Isend of j-structure */
4093   /*--------------------------------*/
4094   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4095 
4096   for (proc=0, k=0; proc<size; proc++) {
4097     if (!len_s[proc]) continue;
4098     i    = owners[proc];
4099     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4100     k++;
4101   }
4102 
4103   /* receives and sends of j-structure are complete */
4104   /*------------------------------------------------*/
4105   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4106   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4107 
4108   /* send and recv i-structure */
4109   /*---------------------------*/
4110   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4111   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4112 
4113   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4114   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4115   for (proc=0,k=0; proc<size; proc++) {
4116     if (!len_s[proc]) continue;
4117     /* form outgoing message for i-structure:
4118          buf_si[0]:                 nrows to be sent
4119                [1:nrows]:           row index (global)
4120                [nrows+1:2*nrows+1]: i-structure index
4121     */
4122     /*-------------------------------------------*/
4123     nrows       = len_si[proc]/2 - 1;
4124     buf_si_i    = buf_si + nrows+1;
4125     buf_si[0]   = nrows;
4126     buf_si_i[0] = 0;
4127     nrows       = 0;
4128     for (i=owners[proc]; i<owners[proc+1]; i++) {
4129       anzi = ai[i+1] - ai[i];
4130       if (anzi) {
4131         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4132         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4133         nrows++;
4134       }
4135     }
4136     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4137     k++;
4138     buf_si += len_si[proc];
4139   }
4140 
4141   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4142   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4143 
4144   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4145   for (i=0; i<merge->nrecv; i++) {
4146     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4147   }
4148 
4149   ierr = PetscFree(len_si);CHKERRQ(ierr);
4150   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4151   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4152   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4153   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4154   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4155   ierr = PetscFree(status);CHKERRQ(ierr);
4156 
4157   /* compute a local seq matrix in each processor */
4158   /*----------------------------------------------*/
4159   /* allocate bi array and free space for accumulating nonzero column info */
4160   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4161   bi[0] = 0;
4162 
4163   /* create and initialize a linked list */
4164   nlnk = N+1;
4165   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4166 
4167   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4168   len  = ai[owners[rank+1]] - ai[owners[rank]];
4169   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4170 
4171   current_space = free_space;
4172 
4173   /* determine symbolic info for each local row */
4174   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4175 
4176   for (k=0; k<merge->nrecv; k++) {
4177     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4178     nrows       = *buf_ri_k[k];
4179     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4180     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4181   }
4182 
4183   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4184   len  = 0;
4185   for (i=0; i<m; i++) {
4186     bnzi = 0;
4187     /* add local non-zero cols of this proc's seqmat into lnk */
4188     arow  = owners[rank] + i;
4189     anzi  = ai[arow+1] - ai[arow];
4190     aj    = a->j + ai[arow];
4191     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4192     bnzi += nlnk;
4193     /* add received col data into lnk */
4194     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4195       if (i == *nextrow[k]) { /* i-th row */
4196         anzi  = *(nextai[k]+1) - *nextai[k];
4197         aj    = buf_rj[k] + *nextai[k];
4198         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4199         bnzi += nlnk;
4200         nextrow[k]++; nextai[k]++;
4201       }
4202     }
4203     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4204 
4205     /* if free space is not available, make more free space */
4206     if (current_space->local_remaining<bnzi) {
4207       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4208       nspacedouble++;
4209     }
4210     /* copy data into free space, then initialize lnk */
4211     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4212     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4213 
4214     current_space->array           += bnzi;
4215     current_space->local_used      += bnzi;
4216     current_space->local_remaining -= bnzi;
4217 
4218     bi[i+1] = bi[i] + bnzi;
4219   }
4220 
4221   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4222 
4223   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4224   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4225   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4226 
4227   /* create symbolic parallel matrix B_mpi */
4228   /*---------------------------------------*/
4229   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4230   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4231   if (n==PETSC_DECIDE) {
4232     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4233   } else {
4234     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4235   }
4236   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4237   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4238   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4239   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4240   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4241 
4242   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4243   B_mpi->assembled    = PETSC_FALSE;
4244   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4245   merge->bi           = bi;
4246   merge->bj           = bj;
4247   merge->buf_ri       = buf_ri;
4248   merge->buf_rj       = buf_rj;
4249   merge->coi          = NULL;
4250   merge->coj          = NULL;
4251   merge->owners_co    = NULL;
4252 
4253   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4254 
4255   /* attach the supporting struct to B_mpi for reuse */
4256   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4257   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4258   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4259   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4260   *mpimat = B_mpi;
4261 
4262   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4263   PetscFunctionReturn(0);
4264 }
4265 
4266 #undef __FUNCT__
4267 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4268 /*@C
4269       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4270                  matrices from each processor
4271 
4272     Collective on MPI_Comm
4273 
4274    Input Parameters:
4275 +    comm - the communicators the parallel matrix will live on
4276 .    seqmat - the input sequential matrices
4277 .    m - number of local rows (or PETSC_DECIDE)
4278 .    n - number of local columns (or PETSC_DECIDE)
4279 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4280 
4281    Output Parameter:
4282 .    mpimat - the parallel matrix generated
4283 
4284     Level: advanced
4285 
4286    Notes:
4287      The dimensions of the sequential matrix in each processor MUST be the same.
4288      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4289      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4290 @*/
4291 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4292 {
4293   PetscErrorCode ierr;
4294   PetscMPIInt    size;
4295 
4296   PetscFunctionBegin;
4297   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4298   if (size == 1) {
4299     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4300     if (scall == MAT_INITIAL_MATRIX) {
4301       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4302     } else {
4303       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4304     }
4305     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4306     PetscFunctionReturn(0);
4307   }
4308   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4309   if (scall == MAT_INITIAL_MATRIX) {
4310     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4311   }
4312   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4313   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4314   PetscFunctionReturn(0);
4315 }
4316 
4317 #undef __FUNCT__
4318 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4319 /*@
4320      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4321           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4322           with MatGetSize()
4323 
4324     Not Collective
4325 
4326    Input Parameters:
4327 +    A - the matrix
4328 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4329 
4330    Output Parameter:
4331 .    A_loc - the local sequential matrix generated
4332 
4333     Level: developer
4334 
4335 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4336 
4337 @*/
4338 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4339 {
4340   PetscErrorCode ierr;
4341   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4342   Mat_SeqAIJ     *mat,*a,*b;
4343   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4344   MatScalar      *aa,*ba,*cam;
4345   PetscScalar    *ca;
4346   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4347   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4348   PetscBool      match;
4349   MPI_Comm       comm;
4350   PetscMPIInt    size;
4351 
4352   PetscFunctionBegin;
4353   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4354   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4355   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4356   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4357   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4358 
4359   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4360   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4361   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4362   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4363   aa = a->a; ba = b->a;
4364   if (scall == MAT_INITIAL_MATRIX) {
4365     if (size == 1) {
4366       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4367       PetscFunctionReturn(0);
4368     }
4369 
4370     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4371     ci[0] = 0;
4372     for (i=0; i<am; i++) {
4373       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4374     }
4375     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4376     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4377     k    = 0;
4378     for (i=0; i<am; i++) {
4379       ncols_o = bi[i+1] - bi[i];
4380       ncols_d = ai[i+1] - ai[i];
4381       /* off-diagonal portion of A */
4382       for (jo=0; jo<ncols_o; jo++) {
4383         col = cmap[*bj];
4384         if (col >= cstart) break;
4385         cj[k]   = col; bj++;
4386         ca[k++] = *ba++;
4387       }
4388       /* diagonal portion of A */
4389       for (j=0; j<ncols_d; j++) {
4390         cj[k]   = cstart + *aj++;
4391         ca[k++] = *aa++;
4392       }
4393       /* off-diagonal portion of A */
4394       for (j=jo; j<ncols_o; j++) {
4395         cj[k]   = cmap[*bj++];
4396         ca[k++] = *ba++;
4397       }
4398     }
4399     /* put together the new matrix */
4400     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4401     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4402     /* Since these are PETSc arrays, change flags to free them as necessary. */
4403     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4404     mat->free_a  = PETSC_TRUE;
4405     mat->free_ij = PETSC_TRUE;
4406     mat->nonew   = 0;
4407   } else if (scall == MAT_REUSE_MATRIX) {
4408     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4409     ci = mat->i; cj = mat->j; cam = mat->a;
4410     for (i=0; i<am; i++) {
4411       /* off-diagonal portion of A */
4412       ncols_o = bi[i+1] - bi[i];
4413       for (jo=0; jo<ncols_o; jo++) {
4414         col = cmap[*bj];
4415         if (col >= cstart) break;
4416         *cam++ = *ba++; bj++;
4417       }
4418       /* diagonal portion of A */
4419       ncols_d = ai[i+1] - ai[i];
4420       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4421       /* off-diagonal portion of A */
4422       for (j=jo; j<ncols_o; j++) {
4423         *cam++ = *ba++; bj++;
4424       }
4425     }
4426   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4427   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 #undef __FUNCT__
4432 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4433 /*@C
4434      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4435 
4436     Not Collective
4437 
4438    Input Parameters:
4439 +    A - the matrix
4440 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4441 -    row, col - index sets of rows and columns to extract (or NULL)
4442 
4443    Output Parameter:
4444 .    A_loc - the local sequential matrix generated
4445 
4446     Level: developer
4447 
4448 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4449 
4450 @*/
4451 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4452 {
4453   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4454   PetscErrorCode ierr;
4455   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4456   IS             isrowa,iscola;
4457   Mat            *aloc;
4458   PetscBool      match;
4459 
4460   PetscFunctionBegin;
4461   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4462   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4463   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4464   if (!row) {
4465     start = A->rmap->rstart; end = A->rmap->rend;
4466     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4467   } else {
4468     isrowa = *row;
4469   }
4470   if (!col) {
4471     start = A->cmap->rstart;
4472     cmap  = a->garray;
4473     nzA   = a->A->cmap->n;
4474     nzB   = a->B->cmap->n;
4475     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4476     ncols = 0;
4477     for (i=0; i<nzB; i++) {
4478       if (cmap[i] < start) idx[ncols++] = cmap[i];
4479       else break;
4480     }
4481     imark = i;
4482     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4483     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4484     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4485   } else {
4486     iscola = *col;
4487   }
4488   if (scall != MAT_INITIAL_MATRIX) {
4489     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4490     aloc[0] = *A_loc;
4491   }
4492   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4493   *A_loc = aloc[0];
4494   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4495   if (!row) {
4496     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4497   }
4498   if (!col) {
4499     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4500   }
4501   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4502   PetscFunctionReturn(0);
4503 }
4504 
4505 #undef __FUNCT__
4506 #define __FUNCT__ "MatGetBrowsOfAcols"
4507 /*@C
4508     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4509 
4510     Collective on Mat
4511 
4512    Input Parameters:
4513 +    A,B - the matrices in mpiaij format
4514 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4515 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4516 
4517    Output Parameter:
4518 +    rowb, colb - index sets of rows and columns of B to extract
4519 -    B_seq - the sequential matrix generated
4520 
4521     Level: developer
4522 
4523 @*/
4524 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4525 {
4526   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4527   PetscErrorCode ierr;
4528   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4529   IS             isrowb,iscolb;
4530   Mat            *bseq=NULL;
4531 
4532   PetscFunctionBegin;
4533   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4534     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4535   }
4536   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4537 
4538   if (scall == MAT_INITIAL_MATRIX) {
4539     start = A->cmap->rstart;
4540     cmap  = a->garray;
4541     nzA   = a->A->cmap->n;
4542     nzB   = a->B->cmap->n;
4543     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4544     ncols = 0;
4545     for (i=0; i<nzB; i++) {  /* row < local row index */
4546       if (cmap[i] < start) idx[ncols++] = cmap[i];
4547       else break;
4548     }
4549     imark = i;
4550     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4551     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4552     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4553     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4554   } else {
4555     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4556     isrowb  = *rowb; iscolb = *colb;
4557     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4558     bseq[0] = *B_seq;
4559   }
4560   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4561   *B_seq = bseq[0];
4562   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4563   if (!rowb) {
4564     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4565   } else {
4566     *rowb = isrowb;
4567   }
4568   if (!colb) {
4569     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4570   } else {
4571     *colb = iscolb;
4572   }
4573   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4574   PetscFunctionReturn(0);
4575 }
4576 
4577 #undef __FUNCT__
4578 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4579 /*
4580     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4581     of the OFF-DIAGONAL portion of local A
4582 
4583     Collective on Mat
4584 
4585    Input Parameters:
4586 +    A,B - the matrices in mpiaij format
4587 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4588 
4589    Output Parameter:
4590 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4591 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4592 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4593 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4594 
4595     Level: developer
4596 
4597 */
4598 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4599 {
4600   VecScatter_MPI_General *gen_to,*gen_from;
4601   PetscErrorCode         ierr;
4602   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4603   Mat_SeqAIJ             *b_oth;
4604   VecScatter             ctx =a->Mvctx;
4605   MPI_Comm               comm;
4606   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4607   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4608   PetscScalar            *rvalues,*svalues;
4609   MatScalar              *b_otha,*bufa,*bufA;
4610   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4611   MPI_Request            *rwaits = NULL,*swaits = NULL;
4612   MPI_Status             *sstatus,rstatus;
4613   PetscMPIInt            jj,size;
4614   PetscInt               *cols,sbs,rbs;
4615   PetscScalar            *vals;
4616 
4617   PetscFunctionBegin;
4618   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4619   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4620 
4621   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4622     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4623   }
4624   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4625   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4626 
4627   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4628   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4629   rvalues  = gen_from->values; /* holds the length of receiving row */
4630   svalues  = gen_to->values;   /* holds the length of sending row */
4631   nrecvs   = gen_from->n;
4632   nsends   = gen_to->n;
4633 
4634   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4635   srow    = gen_to->indices;    /* local row index to be sent */
4636   sstarts = gen_to->starts;
4637   sprocs  = gen_to->procs;
4638   sstatus = gen_to->sstatus;
4639   sbs     = gen_to->bs;
4640   rstarts = gen_from->starts;
4641   rprocs  = gen_from->procs;
4642   rbs     = gen_from->bs;
4643 
4644   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4645   if (scall == MAT_INITIAL_MATRIX) {
4646     /* i-array */
4647     /*---------*/
4648     /*  post receives */
4649     for (i=0; i<nrecvs; i++) {
4650       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4651       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4652       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4653     }
4654 
4655     /* pack the outgoing message */
4656     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4657 
4658     sstartsj[0] = 0;
4659     rstartsj[0] = 0;
4660     len         = 0; /* total length of j or a array to be sent */
4661     k           = 0;
4662     for (i=0; i<nsends; i++) {
4663       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4664       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4665       for (j=0; j<nrows; j++) {
4666         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4667         for (l=0; l<sbs; l++) {
4668           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4669 
4670           rowlen[j*sbs+l] = ncols;
4671 
4672           len += ncols;
4673           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4674         }
4675         k++;
4676       }
4677       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4678 
4679       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4680     }
4681     /* recvs and sends of i-array are completed */
4682     i = nrecvs;
4683     while (i--) {
4684       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4685     }
4686     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4687 
4688     /* allocate buffers for sending j and a arrays */
4689     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4690     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4691 
4692     /* create i-array of B_oth */
4693     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4694 
4695     b_othi[0] = 0;
4696     len       = 0; /* total length of j or a array to be received */
4697     k         = 0;
4698     for (i=0; i<nrecvs; i++) {
4699       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4700       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4701       for (j=0; j<nrows; j++) {
4702         b_othi[k+1] = b_othi[k] + rowlen[j];
4703         len        += rowlen[j]; k++;
4704       }
4705       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4706     }
4707 
4708     /* allocate space for j and a arrrays of B_oth */
4709     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4710     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4711 
4712     /* j-array */
4713     /*---------*/
4714     /*  post receives of j-array */
4715     for (i=0; i<nrecvs; i++) {
4716       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4717       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4718     }
4719 
4720     /* pack the outgoing message j-array */
4721     k = 0;
4722     for (i=0; i<nsends; i++) {
4723       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4724       bufJ  = bufj+sstartsj[i];
4725       for (j=0; j<nrows; j++) {
4726         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4727         for (ll=0; ll<sbs; ll++) {
4728           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4729           for (l=0; l<ncols; l++) {
4730             *bufJ++ = cols[l];
4731           }
4732           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4733         }
4734       }
4735       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4736     }
4737 
4738     /* recvs and sends of j-array are completed */
4739     i = nrecvs;
4740     while (i--) {
4741       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4742     }
4743     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4744   } else if (scall == MAT_REUSE_MATRIX) {
4745     sstartsj = *startsj_s;
4746     rstartsj = *startsj_r;
4747     bufa     = *bufa_ptr;
4748     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4749     b_otha   = b_oth->a;
4750   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4751 
4752   /* a-array */
4753   /*---------*/
4754   /*  post receives of a-array */
4755   for (i=0; i<nrecvs; i++) {
4756     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4757     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4758   }
4759 
4760   /* pack the outgoing message a-array */
4761   k = 0;
4762   for (i=0; i<nsends; i++) {
4763     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4764     bufA  = bufa+sstartsj[i];
4765     for (j=0; j<nrows; j++) {
4766       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4767       for (ll=0; ll<sbs; ll++) {
4768         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4769         for (l=0; l<ncols; l++) {
4770           *bufA++ = vals[l];
4771         }
4772         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4773       }
4774     }
4775     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4776   }
4777   /* recvs and sends of a-array are completed */
4778   i = nrecvs;
4779   while (i--) {
4780     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4781   }
4782   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4783   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4784 
4785   if (scall == MAT_INITIAL_MATRIX) {
4786     /* put together the new matrix */
4787     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4788 
4789     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4790     /* Since these are PETSc arrays, change flags to free them as necessary. */
4791     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4792     b_oth->free_a  = PETSC_TRUE;
4793     b_oth->free_ij = PETSC_TRUE;
4794     b_oth->nonew   = 0;
4795 
4796     ierr = PetscFree(bufj);CHKERRQ(ierr);
4797     if (!startsj_s || !bufa_ptr) {
4798       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4799       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4800     } else {
4801       *startsj_s = sstartsj;
4802       *startsj_r = rstartsj;
4803       *bufa_ptr  = bufa;
4804     }
4805   }
4806   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4807   PetscFunctionReturn(0);
4808 }
4809 
4810 #undef __FUNCT__
4811 #define __FUNCT__ "MatGetCommunicationStructs"
4812 /*@C
4813   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4814 
4815   Not Collective
4816 
4817   Input Parameters:
4818 . A - The matrix in mpiaij format
4819 
4820   Output Parameter:
4821 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4822 . colmap - A map from global column index to local index into lvec
4823 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4824 
4825   Level: developer
4826 
4827 @*/
4828 #if defined(PETSC_USE_CTABLE)
4829 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4830 #else
4831 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4832 #endif
4833 {
4834   Mat_MPIAIJ *a;
4835 
4836   PetscFunctionBegin;
4837   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4838   PetscValidPointer(lvec, 2);
4839   PetscValidPointer(colmap, 3);
4840   PetscValidPointer(multScatter, 4);
4841   a = (Mat_MPIAIJ*) A->data;
4842   if (lvec) *lvec = a->lvec;
4843   if (colmap) *colmap = a->colmap;
4844   if (multScatter) *multScatter = a->Mvctx;
4845   PetscFunctionReturn(0);
4846 }
4847 
4848 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4849 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4850 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4851 #if defined(PETSC_HAVE_ELEMENTAL)
4852 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4853 #endif
4854 
4855 #undef __FUNCT__
4856 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4857 /*
4858     Computes (B'*A')' since computing B*A directly is untenable
4859 
4860                n                       p                          p
4861         (              )       (              )         (                  )
4862       m (      A       )  *  n (       B      )   =   m (         C        )
4863         (              )       (              )         (                  )
4864 
4865 */
4866 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4867 {
4868   PetscErrorCode ierr;
4869   Mat            At,Bt,Ct;
4870 
4871   PetscFunctionBegin;
4872   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4873   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4874   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4875   ierr = MatDestroy(&At);CHKERRQ(ierr);
4876   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4877   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4878   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4879   PetscFunctionReturn(0);
4880 }
4881 
4882 #undef __FUNCT__
4883 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4884 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4885 {
4886   PetscErrorCode ierr;
4887   PetscInt       m=A->rmap->n,n=B->cmap->n;
4888   Mat            Cmat;
4889 
4890   PetscFunctionBegin;
4891   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4892   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4893   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4894   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4895   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4896   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4897   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4898   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4899 
4900   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4901 
4902   *C = Cmat;
4903   PetscFunctionReturn(0);
4904 }
4905 
4906 /* ----------------------------------------------------------------*/
4907 #undef __FUNCT__
4908 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4909 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4910 {
4911   PetscErrorCode ierr;
4912 
4913   PetscFunctionBegin;
4914   if (scall == MAT_INITIAL_MATRIX) {
4915     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4916     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4917     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4918   }
4919   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4920   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4921   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4922   PetscFunctionReturn(0);
4923 }
4924 
4925 /*MC
4926    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4927 
4928    Options Database Keys:
4929 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4930 
4931   Level: beginner
4932 
4933 .seealso: MatCreateAIJ()
4934 M*/
4935 
4936 #undef __FUNCT__
4937 #define __FUNCT__ "MatCreate_MPIAIJ"
4938 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4939 {
4940   Mat_MPIAIJ     *b;
4941   PetscErrorCode ierr;
4942   PetscMPIInt    size;
4943 
4944   PetscFunctionBegin;
4945   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4946 
4947   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4948   B->data       = (void*)b;
4949   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4950   B->assembled  = PETSC_FALSE;
4951   B->insertmode = NOT_SET_VALUES;
4952   b->size       = size;
4953 
4954   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
4955 
4956   /* build cache for off array entries formed */
4957   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
4958 
4959   b->donotstash  = PETSC_FALSE;
4960   b->colmap      = 0;
4961   b->garray      = 0;
4962   b->roworiented = PETSC_TRUE;
4963 
4964   /* stuff used for matrix vector multiply */
4965   b->lvec  = NULL;
4966   b->Mvctx = NULL;
4967 
4968   /* stuff for MatGetRow() */
4969   b->rowindices   = 0;
4970   b->rowvalues    = 0;
4971   b->getrowactive = PETSC_FALSE;
4972 
4973   /* flexible pointer used in CUSP/CUSPARSE classes */
4974   b->spptr = NULL;
4975 
4976   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4977   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4978   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4979   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4980   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4981   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4982   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4983   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
4984   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
4985   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
4986 #if defined(PETSC_HAVE_ELEMENTAL)
4987   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
4988 #endif
4989   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
4990   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
4991   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
4992   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4993   PetscFunctionReturn(0);
4994 }
4995 
4996 #undef __FUNCT__
4997 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4998 /*@C
4999      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5000          and "off-diagonal" part of the matrix in CSR format.
5001 
5002    Collective on MPI_Comm
5003 
5004    Input Parameters:
5005 +  comm - MPI communicator
5006 .  m - number of local rows (Cannot be PETSC_DECIDE)
5007 .  n - This value should be the same as the local size used in creating the
5008        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5009        calculated if N is given) For square matrices n is almost always m.
5010 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5011 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5012 .   i - row indices for "diagonal" portion of matrix
5013 .   j - column indices
5014 .   a - matrix values
5015 .   oi - row indices for "off-diagonal" portion of matrix
5016 .   oj - column indices
5017 -   oa - matrix values
5018 
5019    Output Parameter:
5020 .   mat - the matrix
5021 
5022    Level: advanced
5023 
5024    Notes:
5025        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5026        must free the arrays once the matrix has been destroyed and not before.
5027 
5028        The i and j indices are 0 based
5029 
5030        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5031 
5032        This sets local rows and cannot be used to set off-processor values.
5033 
5034        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5035        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5036        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5037        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5038        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5039        communication if it is known that only local entries will be set.
5040 
5041 .keywords: matrix, aij, compressed row, sparse, parallel
5042 
5043 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5044           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5045 @*/
5046 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5047 {
5048   PetscErrorCode ierr;
5049   Mat_MPIAIJ     *maij;
5050 
5051   PetscFunctionBegin;
5052   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5053   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5054   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5055   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5056   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5057   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5058   maij = (Mat_MPIAIJ*) (*mat)->data;
5059 
5060   (*mat)->preallocated = PETSC_TRUE;
5061 
5062   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5063   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5064 
5065   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5066   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5067 
5068   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5069   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5070   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5071   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5072 
5073   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5074   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5075   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5076   PetscFunctionReturn(0);
5077 }
5078 
5079 /*
5080     Special version for direct calls from Fortran
5081 */
5082 #include <petsc/private/fortranimpl.h>
5083 
5084 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5085 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5086 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5087 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5088 #endif
5089 
5090 /* Change these macros so can be used in void function */
5091 #undef CHKERRQ
5092 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5093 #undef SETERRQ2
5094 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5095 #undef SETERRQ3
5096 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5097 #undef SETERRQ
5098 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5099 
5100 #undef __FUNCT__
5101 #define __FUNCT__ "matsetvaluesmpiaij_"
5102 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5103 {
5104   Mat            mat  = *mmat;
5105   PetscInt       m    = *mm, n = *mn;
5106   InsertMode     addv = *maddv;
5107   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5108   PetscScalar    value;
5109   PetscErrorCode ierr;
5110 
5111   MatCheckPreallocated(mat,1);
5112   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5113 
5114 #if defined(PETSC_USE_DEBUG)
5115   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5116 #endif
5117   {
5118     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5119     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5120     PetscBool roworiented = aij->roworiented;
5121 
5122     /* Some Variables required in the macro */
5123     Mat        A                 = aij->A;
5124     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5125     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5126     MatScalar  *aa               = a->a;
5127     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5128     Mat        B                 = aij->B;
5129     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5130     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5131     MatScalar  *ba               = b->a;
5132 
5133     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5134     PetscInt  nonew = a->nonew;
5135     MatScalar *ap1,*ap2;
5136 
5137     PetscFunctionBegin;
5138     for (i=0; i<m; i++) {
5139       if (im[i] < 0) continue;
5140 #if defined(PETSC_USE_DEBUG)
5141       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5142 #endif
5143       if (im[i] >= rstart && im[i] < rend) {
5144         row      = im[i] - rstart;
5145         lastcol1 = -1;
5146         rp1      = aj + ai[row];
5147         ap1      = aa + ai[row];
5148         rmax1    = aimax[row];
5149         nrow1    = ailen[row];
5150         low1     = 0;
5151         high1    = nrow1;
5152         lastcol2 = -1;
5153         rp2      = bj + bi[row];
5154         ap2      = ba + bi[row];
5155         rmax2    = bimax[row];
5156         nrow2    = bilen[row];
5157         low2     = 0;
5158         high2    = nrow2;
5159 
5160         for (j=0; j<n; j++) {
5161           if (roworiented) value = v[i*n+j];
5162           else value = v[i+j*m];
5163           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5164           if (in[j] >= cstart && in[j] < cend) {
5165             col = in[j] - cstart;
5166             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5167           } else if (in[j] < 0) continue;
5168 #if defined(PETSC_USE_DEBUG)
5169           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5170 #endif
5171           else {
5172             if (mat->was_assembled) {
5173               if (!aij->colmap) {
5174                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5175               }
5176 #if defined(PETSC_USE_CTABLE)
5177               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5178               col--;
5179 #else
5180               col = aij->colmap[in[j]] - 1;
5181 #endif
5182               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5183                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5184                 col  =  in[j];
5185                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5186                 B     = aij->B;
5187                 b     = (Mat_SeqAIJ*)B->data;
5188                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5189                 rp2   = bj + bi[row];
5190                 ap2   = ba + bi[row];
5191                 rmax2 = bimax[row];
5192                 nrow2 = bilen[row];
5193                 low2  = 0;
5194                 high2 = nrow2;
5195                 bm    = aij->B->rmap->n;
5196                 ba    = b->a;
5197               }
5198             } else col = in[j];
5199             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5200           }
5201         }
5202       } else if (!aij->donotstash) {
5203         if (roworiented) {
5204           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5205         } else {
5206           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5207         }
5208       }
5209     }
5210   }
5211   PetscFunctionReturnVoid();
5212 }
5213 
5214