xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a7fac7c2b47dbcd8ece0cc2dfdfe0e63be1bb7b5)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685   InsertMode     addv;
686 
687   PetscFunctionBegin;
688   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
689 
690   /* make sure all processors are either in INSERTMODE or ADDMODE */
691   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
692   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
693   mat->insertmode = addv; /* in case this processor had no cache */
694 
695   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
696   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
697   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
703 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
704 {
705   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
706   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
707   PetscErrorCode ierr;
708   PetscMPIInt    n;
709   PetscInt       i,j,rstart,ncols,flg;
710   PetscInt       *row,*col;
711   PetscBool      other_disassembled;
712   PetscScalar    *val;
713   InsertMode     addv = mat->insertmode;
714 
715   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
716 
717   PetscFunctionBegin;
718   if (!aij->donotstash && !mat->nooffprocentries) {
719     while (1) {
720       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
721       if (!flg) break;
722 
723       for (i=0; i<n; ) {
724         /* Now identify the consecutive vals belonging to the same row */
725         for (j=i,rstart=row[j]; j<n; j++) {
726           if (row[j] != rstart) break;
727         }
728         if (j < n) ncols = j-i;
729         else       ncols = n-i;
730         /* Now assemble all these values with a single function call */
731         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
732 
733         i = j;
734       }
735     }
736     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
737   }
738   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
740 
741   /* determine if any processor has disassembled, if so we must
742      also disassemble ourselfs, in order that we may reassemble. */
743   /*
744      if nonzero structure of submatrix B cannot change then we know that
745      no processor disassembled thus we can skip this stuff
746   */
747   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
748     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
749     if (mat->was_assembled && !other_disassembled) {
750       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
751     }
752   }
753   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
754     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
755   }
756   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
757   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
758   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
759 
760   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
761 
762   aij->rowvalues = 0;
763 
764   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
765   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
766 
767   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
768   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
769     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
770     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
777 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
778 {
779   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
780   PetscErrorCode ierr;
781 
782   PetscFunctionBegin;
783   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
784   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatZeroRows_MPIAIJ"
790 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
791 {
792   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
793   PetscInt      *owners = A->rmap->range;
794   PetscInt       n      = A->rmap->n;
795   PetscSF        sf;
796   PetscInt      *lrows;
797   PetscSFNode   *rrows;
798   PetscInt       r, p = 0, len = 0;
799   PetscErrorCode ierr;
800 
801   PetscFunctionBegin;
802   /* Create SF where leaves are input rows and roots are owned rows */
803   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
804   for (r = 0; r < n; ++r) lrows[r] = -1;
805   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
806   for (r = 0; r < N; ++r) {
807     const PetscInt idx   = rows[r];
808     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
809     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
810       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
811     }
812     if (A->nooffproczerorows) {
813       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
814       lrows[len++] = idx - owners[p];
815     } else {
816       rrows[r].rank = p;
817       rrows[r].index = rows[r] - owners[p];
818     }
819   }
820   if (!A->nooffproczerorows) {
821     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
822     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
823     /* Collect flags for rows to be zeroed */
824     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
825     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
826     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
827     /* Compress and put in row numbers */
828     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
829   }
830   /* fix right hand side if needed */
831   if (x && b) {
832     const PetscScalar *xx;
833     PetscScalar       *bb;
834 
835     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
836     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
837     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
838     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
839     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
840   }
841   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
843   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
844     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
848     for (r = 0; r < len; ++r) {
849       const PetscInt row = lrows[r] + A->rmap->rstart;
850       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
851     }
852     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   } else {
855     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
856   }
857   ierr = PetscFree(lrows);CHKERRQ(ierr);
858 
859   /* only change matrix nonzero state if pattern was allowed to be changed */
860   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
861     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
862     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
869 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
870 {
871   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
872   PetscErrorCode    ierr;
873   PetscMPIInt       n = A->rmap->n;
874   PetscInt          i,j,r,m,p = 0,len = 0;
875   PetscInt          *lrows,*owners = A->rmap->range;
876   PetscSFNode       *rrows;
877   PetscSF           sf;
878   const PetscScalar *xx;
879   PetscScalar       *bb,*mask;
880   Vec               xmask,lmask;
881   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
882   const PetscInt    *aj, *ii,*ridx;
883   PetscScalar       *aa;
884 
885   PetscFunctionBegin;
886   /* Create SF where leaves are input rows and roots are owned rows */
887   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
888   for (r = 0; r < n; ++r) lrows[r] = -1;
889   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
890   for (r = 0; r < N; ++r) {
891     const PetscInt idx   = rows[r];
892     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
893     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
894       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
895     }
896     rrows[r].rank  = p;
897     rrows[r].index = rows[r] - owners[p];
898   }
899   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
900   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
901   /* Collect flags for rows to be zeroed */
902   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
903   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
904   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
905   /* Compress and put in row numbers */
906   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
907   /* zero diagonal part of matrix */
908   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
909   /* handle off diagonal part of matrix */
910   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
911   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
912   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
913   for (i=0; i<len; i++) bb[lrows[i]] = 1;
914   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
915   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
918   if (x) {
919     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
922     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
923   }
924   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
925   /* remove zeroed rows of off diagonal matrix */
926   ii = aij->i;
927   for (i=0; i<len; i++) {
928     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
929   }
930   /* loop over all elements of off process part of matrix zeroing removed columns*/
931   if (aij->compressedrow.use) {
932     m    = aij->compressedrow.nrows;
933     ii   = aij->compressedrow.i;
934     ridx = aij->compressedrow.rindex;
935     for (i=0; i<m; i++) {
936       n  = ii[i+1] - ii[i];
937       aj = aij->j + ii[i];
938       aa = aij->a + ii[i];
939 
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[*ridx] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948       ridx++;
949     }
950   } else { /* do not use compressed row format */
951     m = l->B->rmap->n;
952     for (i=0; i<m; i++) {
953       n  = ii[i+1] - ii[i];
954       aj = aij->j + ii[i];
955       aa = aij->a + ii[i];
956       for (j=0; j<n; j++) {
957         if (PetscAbsScalar(mask[*aj])) {
958           if (b) bb[i] -= *aa*xx[*aj];
959           *aa = 0.0;
960         }
961         aa++;
962         aj++;
963       }
964     }
965   }
966   if (x) {
967     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
968     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   }
970   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
971   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
972   ierr = PetscFree(lrows);CHKERRQ(ierr);
973 
974   /* only change matrix nonzero state if pattern was allowed to be changed */
975   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
976     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
977     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatMult_MPIAIJ"
984 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
985 {
986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
987   PetscErrorCode ierr;
988   PetscInt       nt;
989 
990   PetscFunctionBegin;
991   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
992   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
993   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
995   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1002 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1003 {
1004   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1014 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1015 {
1016   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1022   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1029 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1030 {
1031   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1032   PetscErrorCode ierr;
1033   PetscBool      merged;
1034 
1035   PetscFunctionBegin;
1036   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1037   /* do nondiagonal part */
1038   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1039   if (!merged) {
1040     /* send it on its way */
1041     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1042     /* do local part */
1043     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1044     /* receive remote parts: note this assumes the values are not actually */
1045     /* added in yy until the next line, */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   } else {
1048     /* do local part */
1049     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1050     /* send it on its way */
1051     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1052     /* values actually were received in the Begin() but we need to call this nop */
1053     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1060 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1061 {
1062   MPI_Comm       comm;
1063   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1064   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1065   IS             Me,Notme;
1066   PetscErrorCode ierr;
1067   PetscInt       M,N,first,last,*notme,i;
1068   PetscMPIInt    size;
1069 
1070   PetscFunctionBegin;
1071   /* Easy test: symmetric diagonal block */
1072   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1073   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1074   if (!*f) PetscFunctionReturn(0);
1075   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1076   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1077   if (size == 1) PetscFunctionReturn(0);
1078 
1079   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1080   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1081   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1082   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1083   for (i=0; i<first; i++) notme[i] = i;
1084   for (i=last; i<M; i++) notme[i-last+first] = i;
1085   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1086   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1087   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1088   Aoff = Aoffs[0];
1089   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1090   Boff = Boffs[0];
1091   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1092   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1093   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1094   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1095   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1096   ierr = PetscFree(notme);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1102 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   /* do nondiagonal part */
1109   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1110   /* send it on its way */
1111   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1112   /* do local part */
1113   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1114   /* receive remote parts */
1115   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*
1120   This only works correctly for square matrices where the subblock A->A is the
1121    diagonal block
1122 */
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1125 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1126 {
1127   PetscErrorCode ierr;
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129 
1130   PetscFunctionBegin;
1131   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1132   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1133   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatScale_MPIAIJ"
1139 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1140 {
1141   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1146   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatDestroy_MPIAIJ"
1152 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1153 {
1154   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158 #if defined(PETSC_USE_LOG)
1159   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1160 #endif
1161   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1162   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1163   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1164   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1165 #if defined(PETSC_USE_CTABLE)
1166   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1167 #else
1168   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1169 #endif
1170   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1171   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1172   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1173   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1174   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1175   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1176 
1177   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1186 #if defined(PETSC_HAVE_ELEMENTAL)
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1188 #endif
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1194 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1195 {
1196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1197   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1198   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1199   PetscErrorCode ierr;
1200   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1201   int            fd;
1202   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1203   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1204   PetscScalar    *column_values;
1205   PetscInt       message_count,flowcontrolcount;
1206   FILE           *file;
1207 
1208   PetscFunctionBegin;
1209   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1210   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1211   nz   = A->nz + B->nz;
1212   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1213   if (!rank) {
1214     header[0] = MAT_FILE_CLASSID;
1215     header[1] = mat->rmap->N;
1216     header[2] = mat->cmap->N;
1217 
1218     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     /* get largest number of rows any processor has */
1221     rlen  = mat->rmap->n;
1222     range = mat->rmap->range;
1223     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1224   } else {
1225     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     rlen = mat->rmap->n;
1227   }
1228 
1229   /* load up the local row counts */
1230   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1231   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1232 
1233   /* store the row lengths to the file */
1234   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1235   if (!rank) {
1236     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1239       rlen = range[i+1] - range[i];
1240       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1246     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1248   }
1249   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1250 
1251   /* load up the local column indices */
1252   nzmax = nz; /* th processor needs space a largest processor needs */
1253   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1255   cnt   = 0;
1256   for (i=0; i<mat->rmap->n; i++) {
1257     for (j=B->i[i]; j<B->i[i+1]; j++) {
1258       if ((col = garray[B->j[j]]) > cstart) break;
1259       column_indices[cnt++] = col;
1260     }
1261     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1262     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1263   }
1264   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1265 
1266   /* store the column indices to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1275       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1277     }
1278     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1279   } else {
1280     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1281     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1284   }
1285   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1286 
1287   /* load up the local column values */
1288   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1289   cnt  = 0;
1290   for (i=0; i<mat->rmap->n; i++) {
1291     for (j=B->i[i]; j<B->i[i+1]; j++) {
1292       if (garray[B->j[j]] > cstart) break;
1293       column_values[cnt++] = B->a[j];
1294     }
1295     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1296     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1297   }
1298   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1299 
1300   /* store the column values to the file */
1301   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1302   if (!rank) {
1303     MPI_Status status;
1304     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1305     for (i=1; i<size; i++) {
1306       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1307       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1308       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1309       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1311     }
1312     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1313   } else {
1314     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1315     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1316     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1317     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1318   }
1319   ierr = PetscFree(column_values);CHKERRQ(ierr);
1320 
1321   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1322   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #include <petscdraw.h>
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1329 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1330 {
1331   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1332   PetscErrorCode    ierr;
1333   PetscMPIInt       rank = aij->rank,size = aij->size;
1334   PetscBool         isdraw,iascii,isbinary;
1335   PetscViewer       sviewer;
1336   PetscViewerFormat format;
1337 
1338   PetscFunctionBegin;
1339   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1340   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1341   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1342   if (iascii) {
1343     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1344     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1345       MatInfo   info;
1346       PetscBool inodes;
1347 
1348       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1349       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1350       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1351       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1352       if (!inodes) {
1353         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1354                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1355       } else {
1356         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1357                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1358       }
1359       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1361       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1362       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1363       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1365       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1366       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1367       PetscFunctionReturn(0);
1368     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1369       PetscInt inodecount,inodelimit,*inodes;
1370       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1371       if (inodes) {
1372         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1373       } else {
1374         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1375       }
1376       PetscFunctionReturn(0);
1377     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1378       PetscFunctionReturn(0);
1379     }
1380   } else if (isbinary) {
1381     if (size == 1) {
1382       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1383       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1384     } else {
1385       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1386     }
1387     PetscFunctionReturn(0);
1388   } else if (isdraw) {
1389     PetscDraw draw;
1390     PetscBool isnull;
1391     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1392     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1393   }
1394 
1395   {
1396     /* assemble the entire matrix onto first processor. */
1397     Mat        A;
1398     Mat_SeqAIJ *Aloc;
1399     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1400     MatScalar  *a;
1401 
1402     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1403     if (!rank) {
1404       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1405     } else {
1406       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1407     }
1408     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1409     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1410     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1411     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1412     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1413 
1414     /* copy over the A part */
1415     Aloc = (Mat_SeqAIJ*)aij->A->data;
1416     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1417     row  = mat->rmap->rstart;
1418     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1423     }
1424     aj = Aloc->j;
1425     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1426 
1427     /* copy over the B part */
1428     Aloc = (Mat_SeqAIJ*)aij->B->data;
1429     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1430     row  = mat->rmap->rstart;
1431     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1432     ct   = cols;
1433     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1434     for (i=0; i<m; i++) {
1435       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1436       row++;
1437       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1438     }
1439     ierr = PetscFree(ct);CHKERRQ(ierr);
1440     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1441     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1442     /*
1443        Everyone has to call to draw the matrix since the graphics waits are
1444        synchronized across all processors that share the PetscDraw object
1445     */
1446     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1447     if (!rank) {
1448       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1449       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1450     }
1451     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1452     ierr = MatDestroy(&A);CHKERRQ(ierr);
1453   }
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatView_MPIAIJ"
1459 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1460 {
1461   PetscErrorCode ierr;
1462   PetscBool      iascii,isdraw,issocket,isbinary;
1463 
1464   PetscFunctionBegin;
1465   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1469   if (iascii || isdraw || isbinary || issocket) {
1470     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatSOR_MPIAIJ"
1477 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1478 {
1479   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1480   PetscErrorCode ierr;
1481   Vec            bb1 = 0;
1482   PetscBool      hasop;
1483 
1484   PetscFunctionBegin;
1485   if (flag == SOR_APPLY_UPPER) {
1486     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1487     PetscFunctionReturn(0);
1488   }
1489 
1490   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1491     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1492   }
1493 
1494   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1495     if (flag & SOR_ZERO_INITIAL_GUESS) {
1496       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1497       its--;
1498     }
1499 
1500     while (its--) {
1501       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503 
1504       /* update rhs: bb1 = bb - B*x */
1505       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1506       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1507 
1508       /* local sweep */
1509       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1510     }
1511   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1512     if (flag & SOR_ZERO_INITIAL_GUESS) {
1513       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1514       its--;
1515     }
1516     while (its--) {
1517       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519 
1520       /* update rhs: bb1 = bb - B*x */
1521       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1522       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1523 
1524       /* local sweep */
1525       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1526     }
1527   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1528     if (flag & SOR_ZERO_INITIAL_GUESS) {
1529       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1530       its--;
1531     }
1532     while (its--) {
1533       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1534       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535 
1536       /* update rhs: bb1 = bb - B*x */
1537       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1538       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1539 
1540       /* local sweep */
1541       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1542     }
1543   } else if (flag & SOR_EISENSTAT) {
1544     Vec xx1;
1545 
1546     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1547     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1548 
1549     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1550     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551     if (!mat->diag) {
1552       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1553       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1554     }
1555     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1556     if (hasop) {
1557       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1558     } else {
1559       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1560     }
1561     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1562 
1563     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1564 
1565     /* local sweep */
1566     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1567     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1568     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1569   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1570 
1571   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatPermute_MPIAIJ"
1577 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1578 {
1579   Mat            aA,aB,Aperm;
1580   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1581   PetscScalar    *aa,*ba;
1582   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1583   PetscSF        rowsf,sf;
1584   IS             parcolp = NULL;
1585   PetscBool      done;
1586   PetscErrorCode ierr;
1587 
1588   PetscFunctionBegin;
1589   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1590   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1591   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1592   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1593 
1594   /* Invert row permutation to find out where my rows should go */
1595   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1596   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1597   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1598   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1599   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1600   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1601 
1602   /* Invert column permutation to find out where my columns should go */
1603   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1604   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1605   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1606   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1607   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1608   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1609   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1610 
1611   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1612   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1613   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1614 
1615   /* Find out where my gcols should go */
1616   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1617   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1618   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1619   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1620   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1621   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1622   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1623   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1624 
1625   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1626   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1627   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1628   for (i=0; i<m; i++) {
1629     PetscInt row = rdest[i],rowner;
1630     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1631     for (j=ai[i]; j<ai[i+1]; j++) {
1632       PetscInt cowner,col = cdest[aj[j]];
1633       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1634       if (rowner == cowner) dnnz[i]++;
1635       else onnz[i]++;
1636     }
1637     for (j=bi[i]; j<bi[i+1]; j++) {
1638       PetscInt cowner,col = gcdest[bj[j]];
1639       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1640       if (rowner == cowner) dnnz[i]++;
1641       else onnz[i]++;
1642     }
1643   }
1644   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1645   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1646   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1647   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1648   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1649 
1650   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1651   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1652   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1653   for (i=0; i<m; i++) {
1654     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1655     PetscInt j0,rowlen;
1656     rowlen = ai[i+1] - ai[i];
1657     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1658       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1659       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1660     }
1661     rowlen = bi[i+1] - bi[i];
1662     for (j0=j=0; j<rowlen; j0=j) {
1663       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1664       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1665     }
1666   }
1667   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1670   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1671   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1672   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1673   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1674   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1675   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1676   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1677   *B = Aperm;
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1683 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1684 {
1685   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1686   PetscErrorCode ierr;
1687 
1688   PetscFunctionBegin;
1689   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1690   if (ghosts) *ghosts = aij->garray;
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1696 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1697 {
1698   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1699   Mat            A    = mat->A,B = mat->B;
1700   PetscErrorCode ierr;
1701   PetscReal      isend[5],irecv[5];
1702 
1703   PetscFunctionBegin;
1704   info->block_size = 1.0;
1705   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1706 
1707   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1708   isend[3] = info->memory;  isend[4] = info->mallocs;
1709 
1710   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1711 
1712   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1713   isend[3] += info->memory;  isend[4] += info->mallocs;
1714   if (flag == MAT_LOCAL) {
1715     info->nz_used      = isend[0];
1716     info->nz_allocated = isend[1];
1717     info->nz_unneeded  = isend[2];
1718     info->memory       = isend[3];
1719     info->mallocs      = isend[4];
1720   } else if (flag == MAT_GLOBAL_MAX) {
1721     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1722 
1723     info->nz_used      = irecv[0];
1724     info->nz_allocated = irecv[1];
1725     info->nz_unneeded  = irecv[2];
1726     info->memory       = irecv[3];
1727     info->mallocs      = irecv[4];
1728   } else if (flag == MAT_GLOBAL_SUM) {
1729     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1730 
1731     info->nz_used      = irecv[0];
1732     info->nz_allocated = irecv[1];
1733     info->nz_unneeded  = irecv[2];
1734     info->memory       = irecv[3];
1735     info->mallocs      = irecv[4];
1736   }
1737   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1738   info->fill_ratio_needed = 0;
1739   info->factor_mallocs    = 0;
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetOption_MPIAIJ"
1745 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1746 {
1747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1748   PetscErrorCode ierr;
1749 
1750   PetscFunctionBegin;
1751   switch (op) {
1752   case MAT_NEW_NONZERO_LOCATIONS:
1753   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1754   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1755   case MAT_KEEP_NONZERO_PATTERN:
1756   case MAT_NEW_NONZERO_LOCATION_ERR:
1757   case MAT_USE_INODES:
1758   case MAT_IGNORE_ZERO_ENTRIES:
1759     MatCheckPreallocated(A,1);
1760     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1761     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1762     break;
1763   case MAT_ROW_ORIENTED:
1764     a->roworiented = flg;
1765 
1766     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1767     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1768     break;
1769   case MAT_NEW_DIAGONALS:
1770     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1771     break;
1772   case MAT_IGNORE_OFF_PROC_ENTRIES:
1773     a->donotstash = flg;
1774     break;
1775   case MAT_SPD:
1776     A->spd_set = PETSC_TRUE;
1777     A->spd     = flg;
1778     if (flg) {
1779       A->symmetric                  = PETSC_TRUE;
1780       A->structurally_symmetric     = PETSC_TRUE;
1781       A->symmetric_set              = PETSC_TRUE;
1782       A->structurally_symmetric_set = PETSC_TRUE;
1783     }
1784     break;
1785   case MAT_SYMMETRIC:
1786     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1787     break;
1788   case MAT_STRUCTURALLY_SYMMETRIC:
1789     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1790     break;
1791   case MAT_HERMITIAN:
1792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1793     break;
1794   case MAT_SYMMETRY_ETERNAL:
1795     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1796     break;
1797   default:
1798     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1799   }
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatGetRow_MPIAIJ"
1805 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1806 {
1807   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1808   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1809   PetscErrorCode ierr;
1810   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1811   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1812   PetscInt       *cmap,*idx_p;
1813 
1814   PetscFunctionBegin;
1815   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1816   mat->getrowactive = PETSC_TRUE;
1817 
1818   if (!mat->rowvalues && (idx || v)) {
1819     /*
1820         allocate enough space to hold information from the longest row.
1821     */
1822     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1823     PetscInt   max = 1,tmp;
1824     for (i=0; i<matin->rmap->n; i++) {
1825       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1826       if (max < tmp) max = tmp;
1827     }
1828     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1829   }
1830 
1831   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1832   lrow = row - rstart;
1833 
1834   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1835   if (!v)   {pvA = 0; pvB = 0;}
1836   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1837   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1838   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1839   nztot = nzA + nzB;
1840 
1841   cmap = mat->garray;
1842   if (v  || idx) {
1843     if (nztot) {
1844       /* Sort by increasing column numbers, assuming A and B already sorted */
1845       PetscInt imark = -1;
1846       if (v) {
1847         *v = v_p = mat->rowvalues;
1848         for (i=0; i<nzB; i++) {
1849           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1850           else break;
1851         }
1852         imark = i;
1853         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1854         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1855       }
1856       if (idx) {
1857         *idx = idx_p = mat->rowindices;
1858         if (imark > -1) {
1859           for (i=0; i<imark; i++) {
1860             idx_p[i] = cmap[cworkB[i]];
1861           }
1862         } else {
1863           for (i=0; i<nzB; i++) {
1864             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1865             else break;
1866           }
1867           imark = i;
1868         }
1869         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1870         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1871       }
1872     } else {
1873       if (idx) *idx = 0;
1874       if (v)   *v   = 0;
1875     }
1876   }
1877   *nz  = nztot;
1878   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1879   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1885 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1886 {
1887   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1888 
1889   PetscFunctionBegin;
1890   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1891   aij->getrowactive = PETSC_FALSE;
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 #undef __FUNCT__
1896 #define __FUNCT__ "MatNorm_MPIAIJ"
1897 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1898 {
1899   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1900   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1901   PetscErrorCode ierr;
1902   PetscInt       i,j,cstart = mat->cmap->rstart;
1903   PetscReal      sum = 0.0;
1904   MatScalar      *v;
1905 
1906   PetscFunctionBegin;
1907   if (aij->size == 1) {
1908     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1909   } else {
1910     if (type == NORM_FROBENIUS) {
1911       v = amat->a;
1912       for (i=0; i<amat->nz; i++) {
1913         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1914       }
1915       v = bmat->a;
1916       for (i=0; i<bmat->nz; i++) {
1917         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1918       }
1919       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1920       *norm = PetscSqrtReal(*norm);
1921     } else if (type == NORM_1) { /* max column norm */
1922       PetscReal *tmp,*tmp2;
1923       PetscInt  *jj,*garray = aij->garray;
1924       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1925       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1926       *norm = 0.0;
1927       v     = amat->a; jj = amat->j;
1928       for (j=0; j<amat->nz; j++) {
1929         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1930       }
1931       v = bmat->a; jj = bmat->j;
1932       for (j=0; j<bmat->nz; j++) {
1933         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1934       }
1935       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1936       for (j=0; j<mat->cmap->N; j++) {
1937         if (tmp2[j] > *norm) *norm = tmp2[j];
1938       }
1939       ierr = PetscFree(tmp);CHKERRQ(ierr);
1940       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1941     } else if (type == NORM_INFINITY) { /* max row norm */
1942       PetscReal ntemp = 0.0;
1943       for (j=0; j<aij->A->rmap->n; j++) {
1944         v   = amat->a + amat->i[j];
1945         sum = 0.0;
1946         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1947           sum += PetscAbsScalar(*v); v++;
1948         }
1949         v = bmat->a + bmat->i[j];
1950         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1951           sum += PetscAbsScalar(*v); v++;
1952         }
1953         if (sum > ntemp) ntemp = sum;
1954       }
1955       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1956     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1957   }
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatTranspose_MPIAIJ"
1963 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1964 {
1965   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1966   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1967   PetscErrorCode ierr;
1968   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1969   PetscInt       cstart = A->cmap->rstart,ncol;
1970   Mat            B;
1971   MatScalar      *array;
1972 
1973   PetscFunctionBegin;
1974   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1975 
1976   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1977   ai = Aloc->i; aj = Aloc->j;
1978   bi = Bloc->i; bj = Bloc->j;
1979   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1980     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1981     PetscSFNode          *oloc;
1982     PETSC_UNUSED PetscSF sf;
1983 
1984     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1985     /* compute d_nnz for preallocation */
1986     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1987     for (i=0; i<ai[ma]; i++) {
1988       d_nnz[aj[i]]++;
1989       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1990     }
1991     /* compute local off-diagonal contributions */
1992     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1993     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1994     /* map those to global */
1995     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1996     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1997     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1998     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1999     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2000     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2001     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2002 
2003     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2004     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2005     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2006     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2007     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2008     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2009   } else {
2010     B    = *matout;
2011     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2012     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2013   }
2014 
2015   /* copy over the A part */
2016   array = Aloc->a;
2017   row   = A->rmap->rstart;
2018   for (i=0; i<ma; i++) {
2019     ncol = ai[i+1]-ai[i];
2020     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2021     row++;
2022     array += ncol; aj += ncol;
2023   }
2024   aj = Aloc->j;
2025   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2026 
2027   /* copy over the B part */
2028   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2029   array = Bloc->a;
2030   row   = A->rmap->rstart;
2031   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2032   cols_tmp = cols;
2033   for (i=0; i<mb; i++) {
2034     ncol = bi[i+1]-bi[i];
2035     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2036     row++;
2037     array += ncol; cols_tmp += ncol;
2038   }
2039   ierr = PetscFree(cols);CHKERRQ(ierr);
2040 
2041   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2042   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2043   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2044     *matout = B;
2045   } else {
2046     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2047   }
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2053 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2054 {
2055   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2056   Mat            a    = aij->A,b = aij->B;
2057   PetscErrorCode ierr;
2058   PetscInt       s1,s2,s3;
2059 
2060   PetscFunctionBegin;
2061   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2062   if (rr) {
2063     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2064     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2065     /* Overlap communication with computation. */
2066     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2067   }
2068   if (ll) {
2069     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2070     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2071     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2072   }
2073   /* scale  the diagonal block */
2074   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2075 
2076   if (rr) {
2077     /* Do a scatter end and then right scale the off-diagonal block */
2078     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2079     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2080   }
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2086 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2087 {
2088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2089   PetscErrorCode ierr;
2090 
2091   PetscFunctionBegin;
2092   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 #undef __FUNCT__
2097 #define __FUNCT__ "MatEqual_MPIAIJ"
2098 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2099 {
2100   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2101   Mat            a,b,c,d;
2102   PetscBool      flg;
2103   PetscErrorCode ierr;
2104 
2105   PetscFunctionBegin;
2106   a = matA->A; b = matA->B;
2107   c = matB->A; d = matB->B;
2108 
2109   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2110   if (flg) {
2111     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2112   }
2113   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 #undef __FUNCT__
2118 #define __FUNCT__ "MatCopy_MPIAIJ"
2119 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2120 {
2121   PetscErrorCode ierr;
2122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2123   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2124 
2125   PetscFunctionBegin;
2126   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2127   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2128     /* because of the column compression in the off-processor part of the matrix a->B,
2129        the number of columns in a->B and b->B may be different, hence we cannot call
2130        the MatCopy() directly on the two parts. If need be, we can provide a more
2131        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2132        then copying the submatrices */
2133     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2134   } else {
2135     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2136     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2137   }
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 #undef __FUNCT__
2142 #define __FUNCT__ "MatSetUp_MPIAIJ"
2143 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2144 {
2145   PetscErrorCode ierr;
2146 
2147   PetscFunctionBegin;
2148   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 /*
2153    Computes the number of nonzeros per row needed for preallocation when X and Y
2154    have different nonzero structure.
2155 */
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2158 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2159 {
2160   PetscInt       i,j,k,nzx,nzy;
2161 
2162   PetscFunctionBegin;
2163   /* Set the number of nonzeros in the new matrix */
2164   for (i=0; i<m; i++) {
2165     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2166     nzx = xi[i+1] - xi[i];
2167     nzy = yi[i+1] - yi[i];
2168     nnz[i] = 0;
2169     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2170       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2171       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2172       nnz[i]++;
2173     }
2174     for (; k<nzy; k++) nnz[i]++;
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2182 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2183 {
2184   PetscErrorCode ierr;
2185   PetscInt       m = Y->rmap->N;
2186   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2187   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2188 
2189   PetscFunctionBegin;
2190   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatAXPY_MPIAIJ"
2196 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2197 {
2198   PetscErrorCode ierr;
2199   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2200   PetscBLASInt   bnz,one=1;
2201   Mat_SeqAIJ     *x,*y;
2202 
2203   PetscFunctionBegin;
2204   if (str == SAME_NONZERO_PATTERN) {
2205     PetscScalar alpha = a;
2206     x    = (Mat_SeqAIJ*)xx->A->data;
2207     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2208     y    = (Mat_SeqAIJ*)yy->A->data;
2209     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2210     x    = (Mat_SeqAIJ*)xx->B->data;
2211     y    = (Mat_SeqAIJ*)yy->B->data;
2212     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2213     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2214     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2215   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2216     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2217   } else {
2218     Mat      B;
2219     PetscInt *nnz_d,*nnz_o;
2220     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2221     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2222     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2223     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2224     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2225     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2226     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2227     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2228     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2229     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2230     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2231     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2232     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2233     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2234   }
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2239 
2240 #undef __FUNCT__
2241 #define __FUNCT__ "MatConjugate_MPIAIJ"
2242 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2243 {
2244 #if defined(PETSC_USE_COMPLEX)
2245   PetscErrorCode ierr;
2246   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2247 
2248   PetscFunctionBegin;
2249   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2250   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2251 #else
2252   PetscFunctionBegin;
2253 #endif
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatRealPart_MPIAIJ"
2259 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2260 {
2261   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2262   PetscErrorCode ierr;
2263 
2264   PetscFunctionBegin;
2265   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2266   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 #undef __FUNCT__
2271 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2272 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2273 {
2274   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2275   PetscErrorCode ierr;
2276 
2277   PetscFunctionBegin;
2278   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2279   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 #if defined(PETSC_HAVE_PBGL)
2284 
2285 #include <boost/parallel/mpi/bsp_process_group.hpp>
2286 #include <boost/graph/distributed/ilu_default_graph.hpp>
2287 #include <boost/graph/distributed/ilu_0_block.hpp>
2288 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2289 #include <boost/graph/distributed/petsc/interface.hpp>
2290 #include <boost/multi_array.hpp>
2291 #include <boost/parallel/distributed_property_map.hpp>
2292 
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2295 /*
2296   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2297 */
2298 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2299 {
2300   namespace petsc = boost::distributed::petsc;
2301 
2302   namespace graph_dist = boost::graph::distributed;
2303   using boost::graph::distributed::ilu_default::process_group_type;
2304   using boost::graph::ilu_permuted;
2305 
2306   PetscBool      row_identity, col_identity;
2307   PetscContainer c;
2308   PetscInt       m, n, M, N;
2309   PetscErrorCode ierr;
2310 
2311   PetscFunctionBegin;
2312   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2313   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2314   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2315   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2316 
2317   process_group_type pg;
2318   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2319   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2320   lgraph_type& level_graph = *lgraph_p;
2321   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2322 
2323   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2324   ilu_permuted(level_graph);
2325 
2326   /* put together the new matrix */
2327   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2328   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2329   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2330   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2331   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2332   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2333   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2334   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2335 
2336   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2337   ierr = PetscContainerSetPointer(c, lgraph_p);
2338   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2339   ierr = PetscContainerDestroy(&c);
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2345 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2346 {
2347   PetscFunctionBegin;
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatSolve_MPIAIJ"
2353 /*
2354   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2355 */
2356 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2357 {
2358   namespace graph_dist = boost::graph::distributed;
2359 
2360   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2361   lgraph_type    *lgraph_p;
2362   PetscContainer c;
2363   PetscErrorCode ierr;
2364 
2365   PetscFunctionBegin;
2366   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2367   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2368   ierr = VecCopy(b, x);CHKERRQ(ierr);
2369 
2370   PetscScalar *array_x;
2371   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2372   PetscInt sx;
2373   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2374 
2375   PetscScalar *array_b;
2376   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2377   PetscInt sb;
2378   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2379 
2380   lgraph_type& level_graph = *lgraph_p;
2381   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2382 
2383   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2384   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2385   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2386 
2387   typedef boost::iterator_property_map<array_ref_type::iterator,
2388                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2389   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2390   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2391 
2392   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2393   PetscFunctionReturn(0);
2394 }
2395 #endif
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2399 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2400 {
2401   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2402   PetscErrorCode ierr;
2403   PetscInt       i,*idxb = 0;
2404   PetscScalar    *va,*vb;
2405   Vec            vtmp;
2406 
2407   PetscFunctionBegin;
2408   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2409   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2410   if (idx) {
2411     for (i=0; i<A->rmap->n; i++) {
2412       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2413     }
2414   }
2415 
2416   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2417   if (idx) {
2418     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2419   }
2420   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2421   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2422 
2423   for (i=0; i<A->rmap->n; i++) {
2424     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2425       va[i] = vb[i];
2426       if (idx) idx[i] = a->garray[idxb[i]];
2427     }
2428   }
2429 
2430   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2431   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2432   ierr = PetscFree(idxb);CHKERRQ(ierr);
2433   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 #undef __FUNCT__
2438 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2439 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2440 {
2441   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2442   PetscErrorCode ierr;
2443   PetscInt       i,*idxb = 0;
2444   PetscScalar    *va,*vb;
2445   Vec            vtmp;
2446 
2447   PetscFunctionBegin;
2448   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2449   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2450   if (idx) {
2451     for (i=0; i<A->cmap->n; i++) {
2452       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2453     }
2454   }
2455 
2456   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2457   if (idx) {
2458     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2459   }
2460   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2461   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2462 
2463   for (i=0; i<A->rmap->n; i++) {
2464     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2465       va[i] = vb[i];
2466       if (idx) idx[i] = a->garray[idxb[i]];
2467     }
2468   }
2469 
2470   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2471   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2472   ierr = PetscFree(idxb);CHKERRQ(ierr);
2473   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2479 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2480 {
2481   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2482   PetscInt       n      = A->rmap->n;
2483   PetscInt       cstart = A->cmap->rstart;
2484   PetscInt       *cmap  = mat->garray;
2485   PetscInt       *diagIdx, *offdiagIdx;
2486   Vec            diagV, offdiagV;
2487   PetscScalar    *a, *diagA, *offdiagA;
2488   PetscInt       r;
2489   PetscErrorCode ierr;
2490 
2491   PetscFunctionBegin;
2492   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2493   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2494   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2495   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2496   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2497   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2498   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2499   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2500   for (r = 0; r < n; ++r) {
2501     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2502       a[r]   = diagA[r];
2503       idx[r] = cstart + diagIdx[r];
2504     } else {
2505       a[r]   = offdiagA[r];
2506       idx[r] = cmap[offdiagIdx[r]];
2507     }
2508   }
2509   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2510   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2511   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2512   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2513   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2514   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2520 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2521 {
2522   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2523   PetscInt       n      = A->rmap->n;
2524   PetscInt       cstart = A->cmap->rstart;
2525   PetscInt       *cmap  = mat->garray;
2526   PetscInt       *diagIdx, *offdiagIdx;
2527   Vec            diagV, offdiagV;
2528   PetscScalar    *a, *diagA, *offdiagA;
2529   PetscInt       r;
2530   PetscErrorCode ierr;
2531 
2532   PetscFunctionBegin;
2533   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2534   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2535   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2536   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2537   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2538   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2539   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2540   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2541   for (r = 0; r < n; ++r) {
2542     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2543       a[r]   = diagA[r];
2544       idx[r] = cstart + diagIdx[r];
2545     } else {
2546       a[r]   = offdiagA[r];
2547       idx[r] = cmap[offdiagIdx[r]];
2548     }
2549   }
2550   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2551   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2552   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2553   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2554   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2555   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 #undef __FUNCT__
2560 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2561 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2562 {
2563   PetscErrorCode ierr;
2564   Mat            *dummy;
2565 
2566   PetscFunctionBegin;
2567   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2568   *newmat = *dummy;
2569   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 #undef __FUNCT__
2574 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2575 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2576 {
2577   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2578   PetscErrorCode ierr;
2579 
2580   PetscFunctionBegin;
2581   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 #undef __FUNCT__
2586 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2587 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2588 {
2589   PetscErrorCode ierr;
2590   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2591 
2592   PetscFunctionBegin;
2593   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2594   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2595   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2596   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 #undef __FUNCT__
2601 #define __FUNCT__ "MatShift_MPIAIJ"
2602 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2603 {
2604   PetscErrorCode ierr;
2605   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2606   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2607 
2608   PetscFunctionBegin;
2609   if (!Y->preallocated) {
2610     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2611   } else if (!aij->nz) {
2612     PetscInt nonew = aij->nonew;
2613     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2614     aij->nonew = nonew;
2615   }
2616   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 /* -------------------------------------------------------------------*/
2621 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2622                                        MatGetRow_MPIAIJ,
2623                                        MatRestoreRow_MPIAIJ,
2624                                        MatMult_MPIAIJ,
2625                                 /* 4*/ MatMultAdd_MPIAIJ,
2626                                        MatMultTranspose_MPIAIJ,
2627                                        MatMultTransposeAdd_MPIAIJ,
2628 #if defined(PETSC_HAVE_PBGL)
2629                                        MatSolve_MPIAIJ,
2630 #else
2631                                        0,
2632 #endif
2633                                        0,
2634                                        0,
2635                                 /*10*/ 0,
2636                                        0,
2637                                        0,
2638                                        MatSOR_MPIAIJ,
2639                                        MatTranspose_MPIAIJ,
2640                                 /*15*/ MatGetInfo_MPIAIJ,
2641                                        MatEqual_MPIAIJ,
2642                                        MatGetDiagonal_MPIAIJ,
2643                                        MatDiagonalScale_MPIAIJ,
2644                                        MatNorm_MPIAIJ,
2645                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2646                                        MatAssemblyEnd_MPIAIJ,
2647                                        MatSetOption_MPIAIJ,
2648                                        MatZeroEntries_MPIAIJ,
2649                                 /*24*/ MatZeroRows_MPIAIJ,
2650                                        0,
2651 #if defined(PETSC_HAVE_PBGL)
2652                                        0,
2653 #else
2654                                        0,
2655 #endif
2656                                        0,
2657                                        0,
2658                                 /*29*/ MatSetUp_MPIAIJ,
2659 #if defined(PETSC_HAVE_PBGL)
2660                                        0,
2661 #else
2662                                        0,
2663 #endif
2664                                        0,
2665                                        0,
2666                                        0,
2667                                 /*34*/ MatDuplicate_MPIAIJ,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                 /*39*/ MatAXPY_MPIAIJ,
2673                                        MatGetSubMatrices_MPIAIJ,
2674                                        MatIncreaseOverlap_MPIAIJ,
2675                                        MatGetValues_MPIAIJ,
2676                                        MatCopy_MPIAIJ,
2677                                 /*44*/ MatGetRowMax_MPIAIJ,
2678                                        MatScale_MPIAIJ,
2679                                        MatShift_MPIAIJ,
2680                                        MatDiagonalSet_MPIAIJ,
2681                                        MatZeroRowsColumns_MPIAIJ,
2682                                 /*49*/ MatSetRandom_MPIAIJ,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2688                                        0,
2689                                        MatSetUnfactored_MPIAIJ,
2690                                        MatPermute_MPIAIJ,
2691                                        0,
2692                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2693                                        MatDestroy_MPIAIJ,
2694                                        MatView_MPIAIJ,
2695                                        0,
2696                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2697                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2698                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2703                                        MatGetRowMinAbs_MPIAIJ,
2704                                        0,
2705                                        MatSetColoring_MPIAIJ,
2706                                        0,
2707                                        MatSetValuesAdifor_MPIAIJ,
2708                                 /*75*/ MatFDColoringApply_AIJ,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        MatFindZeroDiagonals_MPIAIJ,
2713                                 /*80*/ 0,
2714                                        0,
2715                                        0,
2716                                 /*83*/ MatLoad_MPIAIJ,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        0,
2722                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2723                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2724                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2725                                        MatPtAP_MPIAIJ_MPIAIJ,
2726                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2727                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                 /*99*/ 0,
2733                                        0,
2734                                        0,
2735                                        MatConjugate_MPIAIJ,
2736                                        0,
2737                                 /*104*/MatSetValuesRow_MPIAIJ,
2738                                        MatRealPart_MPIAIJ,
2739                                        MatImaginaryPart_MPIAIJ,
2740                                        0,
2741                                        0,
2742                                 /*109*/0,
2743                                        0,
2744                                        MatGetRowMin_MPIAIJ,
2745                                        0,
2746                                        0,
2747                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2748                                        0,
2749                                        MatGetGhosts_MPIAIJ,
2750                                        0,
2751                                        0,
2752                                 /*119*/0,
2753                                        0,
2754                                        0,
2755                                        0,
2756                                        MatGetMultiProcBlock_MPIAIJ,
2757                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2758                                        MatGetColumnNorms_MPIAIJ,
2759                                        MatInvertBlockDiagonal_MPIAIJ,
2760                                        0,
2761                                        MatGetSubMatricesMPI_MPIAIJ,
2762                                 /*129*/0,
2763                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2764                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2765                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2766                                        0,
2767                                 /*134*/0,
2768                                        0,
2769                                        0,
2770                                        0,
2771                                        0,
2772                                 /*139*/0,
2773                                        0,
2774                                        0,
2775                                        MatFDColoringSetUp_MPIXAIJ,
2776                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2777                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2778 };
2779 
2780 /* ----------------------------------------------------------------------------------------*/
2781 
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2784 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2785 {
2786   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2787   PetscErrorCode ierr;
2788 
2789   PetscFunctionBegin;
2790   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2791   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2792   PetscFunctionReturn(0);
2793 }
2794 
2795 #undef __FUNCT__
2796 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2797 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2798 {
2799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2800   PetscErrorCode ierr;
2801 
2802   PetscFunctionBegin;
2803   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2804   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 #undef __FUNCT__
2809 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2810 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2811 {
2812   Mat_MPIAIJ     *b;
2813   PetscErrorCode ierr;
2814 
2815   PetscFunctionBegin;
2816   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2817   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2818   b = (Mat_MPIAIJ*)B->data;
2819 
2820   if (!B->preallocated) {
2821     /* Explicitly create 2 MATSEQAIJ matrices. */
2822     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2823     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2824     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2825     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2826     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2827     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2828     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2829     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2830     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2831     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2832   }
2833 
2834   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2835   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2836   B->preallocated = PETSC_TRUE;
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 #undef __FUNCT__
2841 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2842 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2843 {
2844   Mat            mat;
2845   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2846   PetscErrorCode ierr;
2847 
2848   PetscFunctionBegin;
2849   *newmat = 0;
2850   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2851   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2852   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2853   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2854   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2855   a       = (Mat_MPIAIJ*)mat->data;
2856 
2857   mat->factortype   = matin->factortype;
2858   mat->assembled    = PETSC_TRUE;
2859   mat->insertmode   = NOT_SET_VALUES;
2860   mat->preallocated = PETSC_TRUE;
2861 
2862   a->size         = oldmat->size;
2863   a->rank         = oldmat->rank;
2864   a->donotstash   = oldmat->donotstash;
2865   a->roworiented  = oldmat->roworiented;
2866   a->rowindices   = 0;
2867   a->rowvalues    = 0;
2868   a->getrowactive = PETSC_FALSE;
2869 
2870   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2871   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2872 
2873   if (oldmat->colmap) {
2874 #if defined(PETSC_USE_CTABLE)
2875     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2876 #else
2877     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2878     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2879     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2880 #endif
2881   } else a->colmap = 0;
2882   if (oldmat->garray) {
2883     PetscInt len;
2884     len  = oldmat->B->cmap->n;
2885     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2886     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2887     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2888   } else a->garray = 0;
2889 
2890   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2891   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2892   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2893   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2894   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2895   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2896   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2897   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2898   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2899   *newmat = mat;
2900   PetscFunctionReturn(0);
2901 }
2902 
2903 
2904 
2905 #undef __FUNCT__
2906 #define __FUNCT__ "MatLoad_MPIAIJ"
2907 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2908 {
2909   PetscScalar    *vals,*svals;
2910   MPI_Comm       comm;
2911   PetscErrorCode ierr;
2912   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2913   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2914   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2915   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2916   PetscInt       cend,cstart,n,*rowners;
2917   int            fd;
2918   PetscInt       bs = newMat->rmap->bs;
2919 
2920   PetscFunctionBegin;
2921   /* force binary viewer to load .info file if it has not yet done so */
2922   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2923   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2924   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2925   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2926   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2927   if (!rank) {
2928     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2929     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2930   }
2931 
2932   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2933   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2934   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2935   if (bs < 0) bs = 1;
2936 
2937   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2938   M    = header[1]; N = header[2];
2939 
2940   /* If global sizes are set, check if they are consistent with that given in the file */
2941   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2942   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2943 
2944   /* determine ownership of all (block) rows */
2945   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2946   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2947   else m = newMat->rmap->n; /* Set by user */
2948 
2949   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2950   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2951 
2952   /* First process needs enough room for process with most rows */
2953   if (!rank) {
2954     mmax = rowners[1];
2955     for (i=2; i<=size; i++) {
2956       mmax = PetscMax(mmax, rowners[i]);
2957     }
2958   } else mmax = -1;             /* unused, but compilers complain */
2959 
2960   rowners[0] = 0;
2961   for (i=2; i<=size; i++) {
2962     rowners[i] += rowners[i-1];
2963   }
2964   rstart = rowners[rank];
2965   rend   = rowners[rank+1];
2966 
2967   /* distribute row lengths to all processors */
2968   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2969   if (!rank) {
2970     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2971     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2972     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2973     for (j=0; j<m; j++) {
2974       procsnz[0] += ourlens[j];
2975     }
2976     for (i=1; i<size; i++) {
2977       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2978       /* calculate the number of nonzeros on each processor */
2979       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2980         procsnz[i] += rowlengths[j];
2981       }
2982       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2983     }
2984     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2985   } else {
2986     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2987   }
2988 
2989   if (!rank) {
2990     /* determine max buffer needed and allocate it */
2991     maxnz = 0;
2992     for (i=0; i<size; i++) {
2993       maxnz = PetscMax(maxnz,procsnz[i]);
2994     }
2995     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2996 
2997     /* read in my part of the matrix column indices  */
2998     nz   = procsnz[0];
2999     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3000     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3001 
3002     /* read in every one elses and ship off */
3003     for (i=1; i<size; i++) {
3004       nz   = procsnz[i];
3005       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3006       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3007     }
3008     ierr = PetscFree(cols);CHKERRQ(ierr);
3009   } else {
3010     /* determine buffer space needed for message */
3011     nz = 0;
3012     for (i=0; i<m; i++) {
3013       nz += ourlens[i];
3014     }
3015     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3016 
3017     /* receive message of column indices*/
3018     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3019   }
3020 
3021   /* determine column ownership if matrix is not square */
3022   if (N != M) {
3023     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3024     else n = newMat->cmap->n;
3025     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3026     cstart = cend - n;
3027   } else {
3028     cstart = rstart;
3029     cend   = rend;
3030     n      = cend - cstart;
3031   }
3032 
3033   /* loop over local rows, determining number of off diagonal entries */
3034   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3035   jj   = 0;
3036   for (i=0; i<m; i++) {
3037     for (j=0; j<ourlens[i]; j++) {
3038       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3039       jj++;
3040     }
3041   }
3042 
3043   for (i=0; i<m; i++) {
3044     ourlens[i] -= offlens[i];
3045   }
3046   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3047 
3048   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3049 
3050   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3051 
3052   for (i=0; i<m; i++) {
3053     ourlens[i] += offlens[i];
3054   }
3055 
3056   if (!rank) {
3057     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3058 
3059     /* read in my part of the matrix numerical values  */
3060     nz   = procsnz[0];
3061     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3062 
3063     /* insert into matrix */
3064     jj      = rstart;
3065     smycols = mycols;
3066     svals   = vals;
3067     for (i=0; i<m; i++) {
3068       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3069       smycols += ourlens[i];
3070       svals   += ourlens[i];
3071       jj++;
3072     }
3073 
3074     /* read in other processors and ship out */
3075     for (i=1; i<size; i++) {
3076       nz   = procsnz[i];
3077       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3078       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3079     }
3080     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3081   } else {
3082     /* receive numeric values */
3083     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3084 
3085     /* receive message of values*/
3086     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3087 
3088     /* insert into matrix */
3089     jj      = rstart;
3090     smycols = mycols;
3091     svals   = vals;
3092     for (i=0; i<m; i++) {
3093       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3094       smycols += ourlens[i];
3095       svals   += ourlens[i];
3096       jj++;
3097     }
3098   }
3099   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3100   ierr = PetscFree(vals);CHKERRQ(ierr);
3101   ierr = PetscFree(mycols);CHKERRQ(ierr);
3102   ierr = PetscFree(rowners);CHKERRQ(ierr);
3103   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3104   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3105   PetscFunctionReturn(0);
3106 }
3107 
3108 #undef __FUNCT__
3109 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3110 /* TODO: Not scalable because of ISAllGather(). */
3111 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3112 {
3113   PetscErrorCode ierr;
3114   IS             iscol_local;
3115   PetscInt       csize;
3116 
3117   PetscFunctionBegin;
3118   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3119   if (call == MAT_REUSE_MATRIX) {
3120     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3121     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3122   } else {
3123     PetscInt cbs;
3124     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3125     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3126     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3127   }
3128   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3129   if (call == MAT_INITIAL_MATRIX) {
3130     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3131     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3132   }
3133   PetscFunctionReturn(0);
3134 }
3135 
3136 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3137 #undef __FUNCT__
3138 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3139 /*
3140     Not great since it makes two copies of the submatrix, first an SeqAIJ
3141   in local and then by concatenating the local matrices the end result.
3142   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3143 
3144   Note: This requires a sequential iscol with all indices.
3145 */
3146 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3147 {
3148   PetscErrorCode ierr;
3149   PetscMPIInt    rank,size;
3150   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3151   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3152   PetscBool      allcolumns, colflag;
3153   Mat            M,Mreuse;
3154   MatScalar      *vwork,*aa;
3155   MPI_Comm       comm;
3156   Mat_SeqAIJ     *aij;
3157 
3158   PetscFunctionBegin;
3159   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3160   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3161   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3162 
3163   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3164   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3165   if (colflag && ncol == mat->cmap->N) {
3166     allcolumns = PETSC_TRUE;
3167   } else {
3168     allcolumns = PETSC_FALSE;
3169   }
3170   if (call ==  MAT_REUSE_MATRIX) {
3171     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3172     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3173     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3174   } else {
3175     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3176   }
3177 
3178   /*
3179       m - number of local rows
3180       n - number of columns (same on all processors)
3181       rstart - first row in new global matrix generated
3182   */
3183   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3184   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3185   if (call == MAT_INITIAL_MATRIX) {
3186     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3187     ii  = aij->i;
3188     jj  = aij->j;
3189 
3190     /*
3191         Determine the number of non-zeros in the diagonal and off-diagonal
3192         portions of the matrix in order to do correct preallocation
3193     */
3194 
3195     /* first get start and end of "diagonal" columns */
3196     if (csize == PETSC_DECIDE) {
3197       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3198       if (mglobal == n) { /* square matrix */
3199         nlocal = m;
3200       } else {
3201         nlocal = n/size + ((n % size) > rank);
3202       }
3203     } else {
3204       nlocal = csize;
3205     }
3206     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3207     rstart = rend - nlocal;
3208     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3209 
3210     /* next, compute all the lengths */
3211     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3212     olens = dlens + m;
3213     for (i=0; i<m; i++) {
3214       jend = ii[i+1] - ii[i];
3215       olen = 0;
3216       dlen = 0;
3217       for (j=0; j<jend; j++) {
3218         if (*jj < rstart || *jj >= rend) olen++;
3219         else dlen++;
3220         jj++;
3221       }
3222       olens[i] = olen;
3223       dlens[i] = dlen;
3224     }
3225     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3226     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3227     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3228     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3229     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3230     ierr = PetscFree(dlens);CHKERRQ(ierr);
3231   } else {
3232     PetscInt ml,nl;
3233 
3234     M    = *newmat;
3235     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3236     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3237     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3238     /*
3239          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3240        rather than the slower MatSetValues().
3241     */
3242     M->was_assembled = PETSC_TRUE;
3243     M->assembled     = PETSC_FALSE;
3244   }
3245   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3246   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3247   ii   = aij->i;
3248   jj   = aij->j;
3249   aa   = aij->a;
3250   for (i=0; i<m; i++) {
3251     row   = rstart + i;
3252     nz    = ii[i+1] - ii[i];
3253     cwork = jj;     jj += nz;
3254     vwork = aa;     aa += nz;
3255     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3256   }
3257 
3258   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3259   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3260   *newmat = M;
3261 
3262   /* save submatrix used in processor for next request */
3263   if (call ==  MAT_INITIAL_MATRIX) {
3264     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3265     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3266   }
3267   PetscFunctionReturn(0);
3268 }
3269 
3270 #undef __FUNCT__
3271 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3272 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3273 {
3274   PetscInt       m,cstart, cend,j,nnz,i,d;
3275   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3276   const PetscInt *JJ;
3277   PetscScalar    *values;
3278   PetscErrorCode ierr;
3279 
3280   PetscFunctionBegin;
3281   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3282 
3283   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3284   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3285   m      = B->rmap->n;
3286   cstart = B->cmap->rstart;
3287   cend   = B->cmap->rend;
3288   rstart = B->rmap->rstart;
3289 
3290   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3291 
3292 #if defined(PETSC_USE_DEBUGGING)
3293   for (i=0; i<m; i++) {
3294     nnz = Ii[i+1]- Ii[i];
3295     JJ  = J + Ii[i];
3296     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3297     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3298     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3299   }
3300 #endif
3301 
3302   for (i=0; i<m; i++) {
3303     nnz     = Ii[i+1]- Ii[i];
3304     JJ      = J + Ii[i];
3305     nnz_max = PetscMax(nnz_max,nnz);
3306     d       = 0;
3307     for (j=0; j<nnz; j++) {
3308       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3309     }
3310     d_nnz[i] = d;
3311     o_nnz[i] = nnz - d;
3312   }
3313   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3314   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3315 
3316   if (v) values = (PetscScalar*)v;
3317   else {
3318     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3319   }
3320 
3321   for (i=0; i<m; i++) {
3322     ii   = i + rstart;
3323     nnz  = Ii[i+1]- Ii[i];
3324     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3325   }
3326   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3327   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3328 
3329   if (!v) {
3330     ierr = PetscFree(values);CHKERRQ(ierr);
3331   }
3332   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3333   PetscFunctionReturn(0);
3334 }
3335 
3336 #undef __FUNCT__
3337 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3338 /*@
3339    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3340    (the default parallel PETSc format).
3341 
3342    Collective on MPI_Comm
3343 
3344    Input Parameters:
3345 +  B - the matrix
3346 .  i - the indices into j for the start of each local row (starts with zero)
3347 .  j - the column indices for each local row (starts with zero)
3348 -  v - optional values in the matrix
3349 
3350    Level: developer
3351 
3352    Notes:
3353        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3354      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3355      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3356 
3357        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3358 
3359        The format which is used for the sparse matrix input, is equivalent to a
3360     row-major ordering.. i.e for the following matrix, the input data expected is
3361     as shown:
3362 
3363         1 0 0
3364         2 0 3     P0
3365        -------
3366         4 5 6     P1
3367 
3368      Process0 [P0]: rows_owned=[0,1]
3369         i =  {0,1,3}  [size = nrow+1  = 2+1]
3370         j =  {0,0,2}  [size = nz = 6]
3371         v =  {1,2,3}  [size = nz = 6]
3372 
3373      Process1 [P1]: rows_owned=[2]
3374         i =  {0,3}    [size = nrow+1  = 1+1]
3375         j =  {0,1,2}  [size = nz = 6]
3376         v =  {4,5,6}  [size = nz = 6]
3377 
3378 .keywords: matrix, aij, compressed row, sparse, parallel
3379 
3380 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3381           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3382 @*/
3383 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3384 {
3385   PetscErrorCode ierr;
3386 
3387   PetscFunctionBegin;
3388   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3389   PetscFunctionReturn(0);
3390 }
3391 
3392 #undef __FUNCT__
3393 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3394 /*@C
3395    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3396    (the default parallel PETSc format).  For good matrix assembly performance
3397    the user should preallocate the matrix storage by setting the parameters
3398    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3399    performance can be increased by more than a factor of 50.
3400 
3401    Collective on MPI_Comm
3402 
3403    Input Parameters:
3404 +  B - the matrix
3405 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3406            (same value is used for all local rows)
3407 .  d_nnz - array containing the number of nonzeros in the various rows of the
3408            DIAGONAL portion of the local submatrix (possibly different for each row)
3409            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3410            The size of this array is equal to the number of local rows, i.e 'm'.
3411            For matrices that will be factored, you must leave room for (and set)
3412            the diagonal entry even if it is zero.
3413 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3414            submatrix (same value is used for all local rows).
3415 -  o_nnz - array containing the number of nonzeros in the various rows of the
3416            OFF-DIAGONAL portion of the local submatrix (possibly different for
3417            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3418            structure. The size of this array is equal to the number
3419            of local rows, i.e 'm'.
3420 
3421    If the *_nnz parameter is given then the *_nz parameter is ignored
3422 
3423    The AIJ format (also called the Yale sparse matrix format or
3424    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3425    storage.  The stored row and column indices begin with zero.
3426    See Users-Manual: ch_mat for details.
3427 
3428    The parallel matrix is partitioned such that the first m0 rows belong to
3429    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3430    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3431 
3432    The DIAGONAL portion of the local submatrix of a processor can be defined
3433    as the submatrix which is obtained by extraction the part corresponding to
3434    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3435    first row that belongs to the processor, r2 is the last row belonging to
3436    the this processor, and c1-c2 is range of indices of the local part of a
3437    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3438    common case of a square matrix, the row and column ranges are the same and
3439    the DIAGONAL part is also square. The remaining portion of the local
3440    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3441 
3442    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3443 
3444    You can call MatGetInfo() to get information on how effective the preallocation was;
3445    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3446    You can also run with the option -info and look for messages with the string
3447    malloc in them to see if additional memory allocation was needed.
3448 
3449    Example usage:
3450 
3451    Consider the following 8x8 matrix with 34 non-zero values, that is
3452    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3453    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3454    as follows:
3455 
3456 .vb
3457             1  2  0  |  0  3  0  |  0  4
3458     Proc0   0  5  6  |  7  0  0  |  8  0
3459             9  0 10  | 11  0  0  | 12  0
3460     -------------------------------------
3461            13  0 14  | 15 16 17  |  0  0
3462     Proc1   0 18  0  | 19 20 21  |  0  0
3463             0  0  0  | 22 23  0  | 24  0
3464     -------------------------------------
3465     Proc2  25 26 27  |  0  0 28  | 29  0
3466            30  0  0  | 31 32 33  |  0 34
3467 .ve
3468 
3469    This can be represented as a collection of submatrices as:
3470 
3471 .vb
3472       A B C
3473       D E F
3474       G H I
3475 .ve
3476 
3477    Where the submatrices A,B,C are owned by proc0, D,E,F are
3478    owned by proc1, G,H,I are owned by proc2.
3479 
3480    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3481    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3482    The 'M','N' parameters are 8,8, and have the same values on all procs.
3483 
3484    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3485    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3486    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3487    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3488    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3489    matrix, ans [DF] as another SeqAIJ matrix.
3490 
3491    When d_nz, o_nz parameters are specified, d_nz storage elements are
3492    allocated for every row of the local diagonal submatrix, and o_nz
3493    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3494    One way to choose d_nz and o_nz is to use the max nonzerors per local
3495    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3496    In this case, the values of d_nz,o_nz are:
3497 .vb
3498      proc0 : dnz = 2, o_nz = 2
3499      proc1 : dnz = 3, o_nz = 2
3500      proc2 : dnz = 1, o_nz = 4
3501 .ve
3502    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3503    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3504    for proc3. i.e we are using 12+15+10=37 storage locations to store
3505    34 values.
3506 
3507    When d_nnz, o_nnz parameters are specified, the storage is specified
3508    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3509    In the above case the values for d_nnz,o_nnz are:
3510 .vb
3511      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3512      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3513      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3514 .ve
3515    Here the space allocated is sum of all the above values i.e 34, and
3516    hence pre-allocation is perfect.
3517 
3518    Level: intermediate
3519 
3520 .keywords: matrix, aij, compressed row, sparse, parallel
3521 
3522 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3523           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3524 @*/
3525 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3526 {
3527   PetscErrorCode ierr;
3528 
3529   PetscFunctionBegin;
3530   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3531   PetscValidType(B,1);
3532   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3533   PetscFunctionReturn(0);
3534 }
3535 
3536 #undef __FUNCT__
3537 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3538 /*@
3539      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3540          CSR format the local rows.
3541 
3542    Collective on MPI_Comm
3543 
3544    Input Parameters:
3545 +  comm - MPI communicator
3546 .  m - number of local rows (Cannot be PETSC_DECIDE)
3547 .  n - This value should be the same as the local size used in creating the
3548        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3549        calculated if N is given) For square matrices n is almost always m.
3550 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3551 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3552 .   i - row indices
3553 .   j - column indices
3554 -   a - matrix values
3555 
3556    Output Parameter:
3557 .   mat - the matrix
3558 
3559    Level: intermediate
3560 
3561    Notes:
3562        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3563      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3564      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3565 
3566        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3567 
3568        The format which is used for the sparse matrix input, is equivalent to a
3569     row-major ordering.. i.e for the following matrix, the input data expected is
3570     as shown:
3571 
3572         1 0 0
3573         2 0 3     P0
3574        -------
3575         4 5 6     P1
3576 
3577      Process0 [P0]: rows_owned=[0,1]
3578         i =  {0,1,3}  [size = nrow+1  = 2+1]
3579         j =  {0,0,2}  [size = nz = 6]
3580         v =  {1,2,3}  [size = nz = 6]
3581 
3582      Process1 [P1]: rows_owned=[2]
3583         i =  {0,3}    [size = nrow+1  = 1+1]
3584         j =  {0,1,2}  [size = nz = 6]
3585         v =  {4,5,6}  [size = nz = 6]
3586 
3587 .keywords: matrix, aij, compressed row, sparse, parallel
3588 
3589 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3590           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3591 @*/
3592 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3593 {
3594   PetscErrorCode ierr;
3595 
3596   PetscFunctionBegin;
3597   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3598   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3599   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3600   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3601   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3602   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3603   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3604   PetscFunctionReturn(0);
3605 }
3606 
3607 #undef __FUNCT__
3608 #define __FUNCT__ "MatCreateAIJ"
3609 /*@C
3610    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3611    (the default parallel PETSc format).  For good matrix assembly performance
3612    the user should preallocate the matrix storage by setting the parameters
3613    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3614    performance can be increased by more than a factor of 50.
3615 
3616    Collective on MPI_Comm
3617 
3618    Input Parameters:
3619 +  comm - MPI communicator
3620 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3621            This value should be the same as the local size used in creating the
3622            y vector for the matrix-vector product y = Ax.
3623 .  n - This value should be the same as the local size used in creating the
3624        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3625        calculated if N is given) For square matrices n is almost always m.
3626 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3627 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3628 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3629            (same value is used for all local rows)
3630 .  d_nnz - array containing the number of nonzeros in the various rows of the
3631            DIAGONAL portion of the local submatrix (possibly different for each row)
3632            or NULL, if d_nz is used to specify the nonzero structure.
3633            The size of this array is equal to the number of local rows, i.e 'm'.
3634 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3635            submatrix (same value is used for all local rows).
3636 -  o_nnz - array containing the number of nonzeros in the various rows of the
3637            OFF-DIAGONAL portion of the local submatrix (possibly different for
3638            each row) or NULL, if o_nz is used to specify the nonzero
3639            structure. The size of this array is equal to the number
3640            of local rows, i.e 'm'.
3641 
3642    Output Parameter:
3643 .  A - the matrix
3644 
3645    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3646    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3647    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3648 
3649    Notes:
3650    If the *_nnz parameter is given then the *_nz parameter is ignored
3651 
3652    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3653    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3654    storage requirements for this matrix.
3655 
3656    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3657    processor than it must be used on all processors that share the object for
3658    that argument.
3659 
3660    The user MUST specify either the local or global matrix dimensions
3661    (possibly both).
3662 
3663    The parallel matrix is partitioned across processors such that the
3664    first m0 rows belong to process 0, the next m1 rows belong to
3665    process 1, the next m2 rows belong to process 2 etc.. where
3666    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3667    values corresponding to [m x N] submatrix.
3668 
3669    The columns are logically partitioned with the n0 columns belonging
3670    to 0th partition, the next n1 columns belonging to the next
3671    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3672 
3673    The DIAGONAL portion of the local submatrix on any given processor
3674    is the submatrix corresponding to the rows and columns m,n
3675    corresponding to the given processor. i.e diagonal matrix on
3676    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3677    etc. The remaining portion of the local submatrix [m x (N-n)]
3678    constitute the OFF-DIAGONAL portion. The example below better
3679    illustrates this concept.
3680 
3681    For a square global matrix we define each processor's diagonal portion
3682    to be its local rows and the corresponding columns (a square submatrix);
3683    each processor's off-diagonal portion encompasses the remainder of the
3684    local matrix (a rectangular submatrix).
3685 
3686    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3687 
3688    When calling this routine with a single process communicator, a matrix of
3689    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3690    type of communicator, use the construction mechanism:
3691      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3692 
3693    By default, this format uses inodes (identical nodes) when possible.
3694    We search for consecutive rows with the same nonzero structure, thereby
3695    reusing matrix information to achieve increased efficiency.
3696 
3697    Options Database Keys:
3698 +  -mat_no_inode  - Do not use inodes
3699 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3700 -  -mat_aij_oneindex - Internally use indexing starting at 1
3701         rather than 0.  Note that when calling MatSetValues(),
3702         the user still MUST index entries starting at 0!
3703 
3704 
3705    Example usage:
3706 
3707    Consider the following 8x8 matrix with 34 non-zero values, that is
3708    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3709    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3710    as follows:
3711 
3712 .vb
3713             1  2  0  |  0  3  0  |  0  4
3714     Proc0   0  5  6  |  7  0  0  |  8  0
3715             9  0 10  | 11  0  0  | 12  0
3716     -------------------------------------
3717            13  0 14  | 15 16 17  |  0  0
3718     Proc1   0 18  0  | 19 20 21  |  0  0
3719             0  0  0  | 22 23  0  | 24  0
3720     -------------------------------------
3721     Proc2  25 26 27  |  0  0 28  | 29  0
3722            30  0  0  | 31 32 33  |  0 34
3723 .ve
3724 
3725    This can be represented as a collection of submatrices as:
3726 
3727 .vb
3728       A B C
3729       D E F
3730       G H I
3731 .ve
3732 
3733    Where the submatrices A,B,C are owned by proc0, D,E,F are
3734    owned by proc1, G,H,I are owned by proc2.
3735 
3736    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3737    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3738    The 'M','N' parameters are 8,8, and have the same values on all procs.
3739 
3740    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3741    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3742    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3743    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3744    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3745    matrix, ans [DF] as another SeqAIJ matrix.
3746 
3747    When d_nz, o_nz parameters are specified, d_nz storage elements are
3748    allocated for every row of the local diagonal submatrix, and o_nz
3749    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3750    One way to choose d_nz and o_nz is to use the max nonzerors per local
3751    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3752    In this case, the values of d_nz,o_nz are:
3753 .vb
3754      proc0 : dnz = 2, o_nz = 2
3755      proc1 : dnz = 3, o_nz = 2
3756      proc2 : dnz = 1, o_nz = 4
3757 .ve
3758    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3759    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3760    for proc3. i.e we are using 12+15+10=37 storage locations to store
3761    34 values.
3762 
3763    When d_nnz, o_nnz parameters are specified, the storage is specified
3764    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3765    In the above case the values for d_nnz,o_nnz are:
3766 .vb
3767      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3768      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3769      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3770 .ve
3771    Here the space allocated is sum of all the above values i.e 34, and
3772    hence pre-allocation is perfect.
3773 
3774    Level: intermediate
3775 
3776 .keywords: matrix, aij, compressed row, sparse, parallel
3777 
3778 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3779           MPIAIJ, MatCreateMPIAIJWithArrays()
3780 @*/
3781 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3782 {
3783   PetscErrorCode ierr;
3784   PetscMPIInt    size;
3785 
3786   PetscFunctionBegin;
3787   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3788   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3789   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3790   if (size > 1) {
3791     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3792     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3793   } else {
3794     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3795     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3796   }
3797   PetscFunctionReturn(0);
3798 }
3799 
3800 #undef __FUNCT__
3801 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3802 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3803 {
3804   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3805 
3806   PetscFunctionBegin;
3807   if (Ad)     *Ad     = a->A;
3808   if (Ao)     *Ao     = a->B;
3809   if (colmap) *colmap = a->garray;
3810   PetscFunctionReturn(0);
3811 }
3812 
3813 #undef __FUNCT__
3814 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3815 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3816 {
3817   PetscErrorCode ierr;
3818   PetscInt       i;
3819   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3820 
3821   PetscFunctionBegin;
3822   if (coloring->ctype == IS_COLORING_GLOBAL) {
3823     ISColoringValue *allcolors,*colors;
3824     ISColoring      ocoloring;
3825 
3826     /* set coloring for diagonal portion */
3827     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3828 
3829     /* set coloring for off-diagonal portion */
3830     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3831     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3832     for (i=0; i<a->B->cmap->n; i++) {
3833       colors[i] = allcolors[a->garray[i]];
3834     }
3835     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3836     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3837     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3838     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3839   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3840     ISColoringValue *colors;
3841     PetscInt        *larray;
3842     ISColoring      ocoloring;
3843 
3844     /* set coloring for diagonal portion */
3845     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3846     for (i=0; i<a->A->cmap->n; i++) {
3847       larray[i] = i + A->cmap->rstart;
3848     }
3849     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3850     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3851     for (i=0; i<a->A->cmap->n; i++) {
3852       colors[i] = coloring->colors[larray[i]];
3853     }
3854     ierr = PetscFree(larray);CHKERRQ(ierr);
3855     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3856     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3857     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3858 
3859     /* set coloring for off-diagonal portion */
3860     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3861     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3862     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3863     for (i=0; i<a->B->cmap->n; i++) {
3864       colors[i] = coloring->colors[larray[i]];
3865     }
3866     ierr = PetscFree(larray);CHKERRQ(ierr);
3867     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3868     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3869     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3870   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3871   PetscFunctionReturn(0);
3872 }
3873 
3874 #undef __FUNCT__
3875 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3876 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3877 {
3878   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3879   PetscErrorCode ierr;
3880 
3881   PetscFunctionBegin;
3882   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3883   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3884   PetscFunctionReturn(0);
3885 }
3886 
3887 #undef __FUNCT__
3888 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3889 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3890 {
3891   PetscErrorCode ierr;
3892   PetscInt       m,N,i,rstart,nnz,Ii;
3893   PetscInt       *indx;
3894   PetscScalar    *values;
3895 
3896   PetscFunctionBegin;
3897   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3898   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3899     PetscInt       *dnz,*onz,sum,bs,cbs;
3900 
3901     if (n == PETSC_DECIDE) {
3902       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3903     }
3904     /* Check sum(n) = N */
3905     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3906     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3907 
3908     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3909     rstart -= m;
3910 
3911     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3912     for (i=0; i<m; i++) {
3913       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3914       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3915       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3916     }
3917 
3918     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3919     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3920     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3921     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3922     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3923     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3924     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3925   }
3926 
3927   /* numeric phase */
3928   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3929   for (i=0; i<m; i++) {
3930     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3931     Ii   = i + rstart;
3932     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3933     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3934   }
3935   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3936   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3937   PetscFunctionReturn(0);
3938 }
3939 
3940 #undef __FUNCT__
3941 #define __FUNCT__ "MatFileSplit"
3942 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3943 {
3944   PetscErrorCode    ierr;
3945   PetscMPIInt       rank;
3946   PetscInt          m,N,i,rstart,nnz;
3947   size_t            len;
3948   const PetscInt    *indx;
3949   PetscViewer       out;
3950   char              *name;
3951   Mat               B;
3952   const PetscScalar *values;
3953 
3954   PetscFunctionBegin;
3955   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3956   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3957   /* Should this be the type of the diagonal block of A? */
3958   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3959   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3960   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3961   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3962   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3963   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3964   for (i=0; i<m; i++) {
3965     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3966     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3967     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3968   }
3969   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3970   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3971 
3972   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3973   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3974   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3975   sprintf(name,"%s.%d",outfile,rank);
3976   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3977   ierr = PetscFree(name);CHKERRQ(ierr);
3978   ierr = MatView(B,out);CHKERRQ(ierr);
3979   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3980   ierr = MatDestroy(&B);CHKERRQ(ierr);
3981   PetscFunctionReturn(0);
3982 }
3983 
3984 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3985 #undef __FUNCT__
3986 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3987 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3988 {
3989   PetscErrorCode      ierr;
3990   Mat_Merge_SeqsToMPI *merge;
3991   PetscContainer      container;
3992 
3993   PetscFunctionBegin;
3994   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3995   if (container) {
3996     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4003     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4004     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4005     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4006     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4007     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4008     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4009     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4010     ierr = PetscFree(merge);CHKERRQ(ierr);
4011     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4012   }
4013   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4014   PetscFunctionReturn(0);
4015 }
4016 
4017 #include <../src/mat/utils/freespace.h>
4018 #include <petscbt.h>
4019 
4020 #undef __FUNCT__
4021 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4022 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4023 {
4024   PetscErrorCode      ierr;
4025   MPI_Comm            comm;
4026   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4027   PetscMPIInt         size,rank,taga,*len_s;
4028   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4029   PetscInt            proc,m;
4030   PetscInt            **buf_ri,**buf_rj;
4031   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4032   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4033   MPI_Request         *s_waits,*r_waits;
4034   MPI_Status          *status;
4035   MatScalar           *aa=a->a;
4036   MatScalar           **abuf_r,*ba_i;
4037   Mat_Merge_SeqsToMPI *merge;
4038   PetscContainer      container;
4039 
4040   PetscFunctionBegin;
4041   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4042   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4043 
4044   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4045   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4046 
4047   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4048   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4049 
4050   bi     = merge->bi;
4051   bj     = merge->bj;
4052   buf_ri = merge->buf_ri;
4053   buf_rj = merge->buf_rj;
4054 
4055   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4056   owners = merge->rowmap->range;
4057   len_s  = merge->len_s;
4058 
4059   /* send and recv matrix values */
4060   /*-----------------------------*/
4061   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4062   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4063 
4064   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4065   for (proc=0,k=0; proc<size; proc++) {
4066     if (!len_s[proc]) continue;
4067     i    = owners[proc];
4068     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4069     k++;
4070   }
4071 
4072   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4073   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4074   ierr = PetscFree(status);CHKERRQ(ierr);
4075 
4076   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4077   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4078 
4079   /* insert mat values of mpimat */
4080   /*----------------------------*/
4081   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4082   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4083 
4084   for (k=0; k<merge->nrecv; k++) {
4085     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4086     nrows       = *(buf_ri_k[k]);
4087     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4088     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4089   }
4090 
4091   /* set values of ba */
4092   m = merge->rowmap->n;
4093   for (i=0; i<m; i++) {
4094     arow = owners[rank] + i;
4095     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4096     bnzi = bi[i+1] - bi[i];
4097     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4098 
4099     /* add local non-zero vals of this proc's seqmat into ba */
4100     anzi   = ai[arow+1] - ai[arow];
4101     aj     = a->j + ai[arow];
4102     aa     = a->a + ai[arow];
4103     nextaj = 0;
4104     for (j=0; nextaj<anzi; j++) {
4105       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4106         ba_i[j] += aa[nextaj++];
4107       }
4108     }
4109 
4110     /* add received vals into ba */
4111     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4112       /* i-th row */
4113       if (i == *nextrow[k]) {
4114         anzi   = *(nextai[k]+1) - *nextai[k];
4115         aj     = buf_rj[k] + *(nextai[k]);
4116         aa     = abuf_r[k] + *(nextai[k]);
4117         nextaj = 0;
4118         for (j=0; nextaj<anzi; j++) {
4119           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4120             ba_i[j] += aa[nextaj++];
4121           }
4122         }
4123         nextrow[k]++; nextai[k]++;
4124       }
4125     }
4126     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4127   }
4128   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4129   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4130 
4131   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4132   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4133   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4134   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4135   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4136   PetscFunctionReturn(0);
4137 }
4138 
4139 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4140 
4141 #undef __FUNCT__
4142 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4143 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4144 {
4145   PetscErrorCode      ierr;
4146   Mat                 B_mpi;
4147   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4148   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4149   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4150   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4151   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4152   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4153   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4154   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4155   MPI_Status          *status;
4156   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4157   PetscBT             lnkbt;
4158   Mat_Merge_SeqsToMPI *merge;
4159   PetscContainer      container;
4160 
4161   PetscFunctionBegin;
4162   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4163 
4164   /* make sure it is a PETSc comm */
4165   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4166   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4167   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4168 
4169   ierr = PetscNew(&merge);CHKERRQ(ierr);
4170   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4171 
4172   /* determine row ownership */
4173   /*---------------------------------------------------------*/
4174   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4175   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4176   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4177   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4178   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4179   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4180   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4181 
4182   m      = merge->rowmap->n;
4183   owners = merge->rowmap->range;
4184 
4185   /* determine the number of messages to send, their lengths */
4186   /*---------------------------------------------------------*/
4187   len_s = merge->len_s;
4188 
4189   len          = 0; /* length of buf_si[] */
4190   merge->nsend = 0;
4191   for (proc=0; proc<size; proc++) {
4192     len_si[proc] = 0;
4193     if (proc == rank) {
4194       len_s[proc] = 0;
4195     } else {
4196       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4197       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4198     }
4199     if (len_s[proc]) {
4200       merge->nsend++;
4201       nrows = 0;
4202       for (i=owners[proc]; i<owners[proc+1]; i++) {
4203         if (ai[i+1] > ai[i]) nrows++;
4204       }
4205       len_si[proc] = 2*(nrows+1);
4206       len         += len_si[proc];
4207     }
4208   }
4209 
4210   /* determine the number and length of messages to receive for ij-structure */
4211   /*-------------------------------------------------------------------------*/
4212   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4213   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4214 
4215   /* post the Irecv of j-structure */
4216   /*-------------------------------*/
4217   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4218   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4219 
4220   /* post the Isend of j-structure */
4221   /*--------------------------------*/
4222   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4223 
4224   for (proc=0, k=0; proc<size; proc++) {
4225     if (!len_s[proc]) continue;
4226     i    = owners[proc];
4227     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4228     k++;
4229   }
4230 
4231   /* receives and sends of j-structure are complete */
4232   /*------------------------------------------------*/
4233   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4234   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4235 
4236   /* send and recv i-structure */
4237   /*---------------------------*/
4238   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4239   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4240 
4241   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4242   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4243   for (proc=0,k=0; proc<size; proc++) {
4244     if (!len_s[proc]) continue;
4245     /* form outgoing message for i-structure:
4246          buf_si[0]:                 nrows to be sent
4247                [1:nrows]:           row index (global)
4248                [nrows+1:2*nrows+1]: i-structure index
4249     */
4250     /*-------------------------------------------*/
4251     nrows       = len_si[proc]/2 - 1;
4252     buf_si_i    = buf_si + nrows+1;
4253     buf_si[0]   = nrows;
4254     buf_si_i[0] = 0;
4255     nrows       = 0;
4256     for (i=owners[proc]; i<owners[proc+1]; i++) {
4257       anzi = ai[i+1] - ai[i];
4258       if (anzi) {
4259         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4260         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4261         nrows++;
4262       }
4263     }
4264     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4265     k++;
4266     buf_si += len_si[proc];
4267   }
4268 
4269   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4270   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4271 
4272   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4273   for (i=0; i<merge->nrecv; i++) {
4274     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4275   }
4276 
4277   ierr = PetscFree(len_si);CHKERRQ(ierr);
4278   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4279   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4280   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4281   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4282   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4283   ierr = PetscFree(status);CHKERRQ(ierr);
4284 
4285   /* compute a local seq matrix in each processor */
4286   /*----------------------------------------------*/
4287   /* allocate bi array and free space for accumulating nonzero column info */
4288   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4289   bi[0] = 0;
4290 
4291   /* create and initialize a linked list */
4292   nlnk = N+1;
4293   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4294 
4295   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4296   len  = ai[owners[rank+1]] - ai[owners[rank]];
4297   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4298 
4299   current_space = free_space;
4300 
4301   /* determine symbolic info for each local row */
4302   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4303 
4304   for (k=0; k<merge->nrecv; k++) {
4305     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4306     nrows       = *buf_ri_k[k];
4307     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4308     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4309   }
4310 
4311   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4312   len  = 0;
4313   for (i=0; i<m; i++) {
4314     bnzi = 0;
4315     /* add local non-zero cols of this proc's seqmat into lnk */
4316     arow  = owners[rank] + i;
4317     anzi  = ai[arow+1] - ai[arow];
4318     aj    = a->j + ai[arow];
4319     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4320     bnzi += nlnk;
4321     /* add received col data into lnk */
4322     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4323       if (i == *nextrow[k]) { /* i-th row */
4324         anzi  = *(nextai[k]+1) - *nextai[k];
4325         aj    = buf_rj[k] + *nextai[k];
4326         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4327         bnzi += nlnk;
4328         nextrow[k]++; nextai[k]++;
4329       }
4330     }
4331     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4332 
4333     /* if free space is not available, make more free space */
4334     if (current_space->local_remaining<bnzi) {
4335       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4336       nspacedouble++;
4337     }
4338     /* copy data into free space, then initialize lnk */
4339     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4340     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4341 
4342     current_space->array           += bnzi;
4343     current_space->local_used      += bnzi;
4344     current_space->local_remaining -= bnzi;
4345 
4346     bi[i+1] = bi[i] + bnzi;
4347   }
4348 
4349   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4350 
4351   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4352   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4353   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4354 
4355   /* create symbolic parallel matrix B_mpi */
4356   /*---------------------------------------*/
4357   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4358   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4359   if (n==PETSC_DECIDE) {
4360     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4361   } else {
4362     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4363   }
4364   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4365   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4366   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4367   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4368   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4369 
4370   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4371   B_mpi->assembled    = PETSC_FALSE;
4372   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4373   merge->bi           = bi;
4374   merge->bj           = bj;
4375   merge->buf_ri       = buf_ri;
4376   merge->buf_rj       = buf_rj;
4377   merge->coi          = NULL;
4378   merge->coj          = NULL;
4379   merge->owners_co    = NULL;
4380 
4381   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4382 
4383   /* attach the supporting struct to B_mpi for reuse */
4384   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4385   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4386   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4387   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4388   *mpimat = B_mpi;
4389 
4390   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4391   PetscFunctionReturn(0);
4392 }
4393 
4394 #undef __FUNCT__
4395 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4396 /*@C
4397       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4398                  matrices from each processor
4399 
4400     Collective on MPI_Comm
4401 
4402    Input Parameters:
4403 +    comm - the communicators the parallel matrix will live on
4404 .    seqmat - the input sequential matrices
4405 .    m - number of local rows (or PETSC_DECIDE)
4406 .    n - number of local columns (or PETSC_DECIDE)
4407 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4408 
4409    Output Parameter:
4410 .    mpimat - the parallel matrix generated
4411 
4412     Level: advanced
4413 
4414    Notes:
4415      The dimensions of the sequential matrix in each processor MUST be the same.
4416      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4417      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4418 @*/
4419 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4420 {
4421   PetscErrorCode ierr;
4422   PetscMPIInt    size;
4423 
4424   PetscFunctionBegin;
4425   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4426   if (size == 1) {
4427     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4428     if (scall == MAT_INITIAL_MATRIX) {
4429       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4430     } else {
4431       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4432     }
4433     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4434     PetscFunctionReturn(0);
4435   }
4436   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4437   if (scall == MAT_INITIAL_MATRIX) {
4438     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4439   }
4440   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4441   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4442   PetscFunctionReturn(0);
4443 }
4444 
4445 #undef __FUNCT__
4446 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4447 /*@
4448      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4449           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4450           with MatGetSize()
4451 
4452     Not Collective
4453 
4454    Input Parameters:
4455 +    A - the matrix
4456 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4457 
4458    Output Parameter:
4459 .    A_loc - the local sequential matrix generated
4460 
4461     Level: developer
4462 
4463 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4464 
4465 @*/
4466 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4467 {
4468   PetscErrorCode ierr;
4469   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4470   Mat_SeqAIJ     *mat,*a,*b;
4471   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4472   MatScalar      *aa,*ba,*cam;
4473   PetscScalar    *ca;
4474   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4475   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4476   PetscBool      match;
4477   MPI_Comm       comm;
4478   PetscMPIInt    size;
4479 
4480   PetscFunctionBegin;
4481   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4482   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4483   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4484   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4485   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4486 
4487   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4488   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4489   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4490   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4491   aa = a->a; ba = b->a;
4492   if (scall == MAT_INITIAL_MATRIX) {
4493     if (size == 1) {
4494       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4495       PetscFunctionReturn(0);
4496     }
4497 
4498     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4499     ci[0] = 0;
4500     for (i=0; i<am; i++) {
4501       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4502     }
4503     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4504     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4505     k    = 0;
4506     for (i=0; i<am; i++) {
4507       ncols_o = bi[i+1] - bi[i];
4508       ncols_d = ai[i+1] - ai[i];
4509       /* off-diagonal portion of A */
4510       for (jo=0; jo<ncols_o; jo++) {
4511         col = cmap[*bj];
4512         if (col >= cstart) break;
4513         cj[k]   = col; bj++;
4514         ca[k++] = *ba++;
4515       }
4516       /* diagonal portion of A */
4517       for (j=0; j<ncols_d; j++) {
4518         cj[k]   = cstart + *aj++;
4519         ca[k++] = *aa++;
4520       }
4521       /* off-diagonal portion of A */
4522       for (j=jo; j<ncols_o; j++) {
4523         cj[k]   = cmap[*bj++];
4524         ca[k++] = *ba++;
4525       }
4526     }
4527     /* put together the new matrix */
4528     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4529     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4530     /* Since these are PETSc arrays, change flags to free them as necessary. */
4531     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4532     mat->free_a  = PETSC_TRUE;
4533     mat->free_ij = PETSC_TRUE;
4534     mat->nonew   = 0;
4535   } else if (scall == MAT_REUSE_MATRIX) {
4536     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4537     ci = mat->i; cj = mat->j; cam = mat->a;
4538     for (i=0; i<am; i++) {
4539       /* off-diagonal portion of A */
4540       ncols_o = bi[i+1] - bi[i];
4541       for (jo=0; jo<ncols_o; jo++) {
4542         col = cmap[*bj];
4543         if (col >= cstart) break;
4544         *cam++ = *ba++; bj++;
4545       }
4546       /* diagonal portion of A */
4547       ncols_d = ai[i+1] - ai[i];
4548       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4549       /* off-diagonal portion of A */
4550       for (j=jo; j<ncols_o; j++) {
4551         *cam++ = *ba++; bj++;
4552       }
4553     }
4554   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4555   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4556   PetscFunctionReturn(0);
4557 }
4558 
4559 #undef __FUNCT__
4560 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4561 /*@C
4562      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4563 
4564     Not Collective
4565 
4566    Input Parameters:
4567 +    A - the matrix
4568 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4569 -    row, col - index sets of rows and columns to extract (or NULL)
4570 
4571    Output Parameter:
4572 .    A_loc - the local sequential matrix generated
4573 
4574     Level: developer
4575 
4576 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4577 
4578 @*/
4579 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4580 {
4581   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4582   PetscErrorCode ierr;
4583   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4584   IS             isrowa,iscola;
4585   Mat            *aloc;
4586   PetscBool      match;
4587 
4588   PetscFunctionBegin;
4589   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4590   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4591   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4592   if (!row) {
4593     start = A->rmap->rstart; end = A->rmap->rend;
4594     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4595   } else {
4596     isrowa = *row;
4597   }
4598   if (!col) {
4599     start = A->cmap->rstart;
4600     cmap  = a->garray;
4601     nzA   = a->A->cmap->n;
4602     nzB   = a->B->cmap->n;
4603     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4604     ncols = 0;
4605     for (i=0; i<nzB; i++) {
4606       if (cmap[i] < start) idx[ncols++] = cmap[i];
4607       else break;
4608     }
4609     imark = i;
4610     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4611     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4612     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4613   } else {
4614     iscola = *col;
4615   }
4616   if (scall != MAT_INITIAL_MATRIX) {
4617     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4618     aloc[0] = *A_loc;
4619   }
4620   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4621   *A_loc = aloc[0];
4622   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4623   if (!row) {
4624     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4625   }
4626   if (!col) {
4627     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4628   }
4629   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4630   PetscFunctionReturn(0);
4631 }
4632 
4633 #undef __FUNCT__
4634 #define __FUNCT__ "MatGetBrowsOfAcols"
4635 /*@C
4636     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4637 
4638     Collective on Mat
4639 
4640    Input Parameters:
4641 +    A,B - the matrices in mpiaij format
4642 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4643 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4644 
4645    Output Parameter:
4646 +    rowb, colb - index sets of rows and columns of B to extract
4647 -    B_seq - the sequential matrix generated
4648 
4649     Level: developer
4650 
4651 @*/
4652 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4653 {
4654   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4655   PetscErrorCode ierr;
4656   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4657   IS             isrowb,iscolb;
4658   Mat            *bseq=NULL;
4659 
4660   PetscFunctionBegin;
4661   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4662     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4663   }
4664   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4665 
4666   if (scall == MAT_INITIAL_MATRIX) {
4667     start = A->cmap->rstart;
4668     cmap  = a->garray;
4669     nzA   = a->A->cmap->n;
4670     nzB   = a->B->cmap->n;
4671     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4672     ncols = 0;
4673     for (i=0; i<nzB; i++) {  /* row < local row index */
4674       if (cmap[i] < start) idx[ncols++] = cmap[i];
4675       else break;
4676     }
4677     imark = i;
4678     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4679     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4680     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4681     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4682   } else {
4683     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4684     isrowb  = *rowb; iscolb = *colb;
4685     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4686     bseq[0] = *B_seq;
4687   }
4688   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4689   *B_seq = bseq[0];
4690   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4691   if (!rowb) {
4692     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4693   } else {
4694     *rowb = isrowb;
4695   }
4696   if (!colb) {
4697     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4698   } else {
4699     *colb = iscolb;
4700   }
4701   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4702   PetscFunctionReturn(0);
4703 }
4704 
4705 #undef __FUNCT__
4706 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4707 /*
4708     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4709     of the OFF-DIAGONAL portion of local A
4710 
4711     Collective on Mat
4712 
4713    Input Parameters:
4714 +    A,B - the matrices in mpiaij format
4715 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4716 
4717    Output Parameter:
4718 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4719 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4720 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4721 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4722 
4723     Level: developer
4724 
4725 */
4726 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4727 {
4728   VecScatter_MPI_General *gen_to,*gen_from;
4729   PetscErrorCode         ierr;
4730   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4731   Mat_SeqAIJ             *b_oth;
4732   VecScatter             ctx =a->Mvctx;
4733   MPI_Comm               comm;
4734   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4735   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4736   PetscScalar            *rvalues,*svalues;
4737   MatScalar              *b_otha,*bufa,*bufA;
4738   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4739   MPI_Request            *rwaits = NULL,*swaits = NULL;
4740   MPI_Status             *sstatus,rstatus;
4741   PetscMPIInt            jj,size;
4742   PetscInt               *cols,sbs,rbs;
4743   PetscScalar            *vals;
4744 
4745   PetscFunctionBegin;
4746   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4747   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4748 
4749   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4750     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4751   }
4752   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4753   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4754 
4755   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4756   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4757   rvalues  = gen_from->values; /* holds the length of receiving row */
4758   svalues  = gen_to->values;   /* holds the length of sending row */
4759   nrecvs   = gen_from->n;
4760   nsends   = gen_to->n;
4761 
4762   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4763   srow    = gen_to->indices;    /* local row index to be sent */
4764   sstarts = gen_to->starts;
4765   sprocs  = gen_to->procs;
4766   sstatus = gen_to->sstatus;
4767   sbs     = gen_to->bs;
4768   rstarts = gen_from->starts;
4769   rprocs  = gen_from->procs;
4770   rbs     = gen_from->bs;
4771 
4772   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4773   if (scall == MAT_INITIAL_MATRIX) {
4774     /* i-array */
4775     /*---------*/
4776     /*  post receives */
4777     for (i=0; i<nrecvs; i++) {
4778       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4779       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4780       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4781     }
4782 
4783     /* pack the outgoing message */
4784     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4785 
4786     sstartsj[0] = 0;
4787     rstartsj[0] = 0;
4788     len         = 0; /* total length of j or a array to be sent */
4789     k           = 0;
4790     for (i=0; i<nsends; i++) {
4791       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4792       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4793       for (j=0; j<nrows; j++) {
4794         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4795         for (l=0; l<sbs; l++) {
4796           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4797 
4798           rowlen[j*sbs+l] = ncols;
4799 
4800           len += ncols;
4801           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4802         }
4803         k++;
4804       }
4805       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4806 
4807       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4808     }
4809     /* recvs and sends of i-array are completed */
4810     i = nrecvs;
4811     while (i--) {
4812       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4813     }
4814     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4815 
4816     /* allocate buffers for sending j and a arrays */
4817     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4818     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4819 
4820     /* create i-array of B_oth */
4821     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4822 
4823     b_othi[0] = 0;
4824     len       = 0; /* total length of j or a array to be received */
4825     k         = 0;
4826     for (i=0; i<nrecvs; i++) {
4827       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4828       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4829       for (j=0; j<nrows; j++) {
4830         b_othi[k+1] = b_othi[k] + rowlen[j];
4831         len        += rowlen[j]; k++;
4832       }
4833       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4834     }
4835 
4836     /* allocate space for j and a arrrays of B_oth */
4837     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4838     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4839 
4840     /* j-array */
4841     /*---------*/
4842     /*  post receives of j-array */
4843     for (i=0; i<nrecvs; i++) {
4844       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4845       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4846     }
4847 
4848     /* pack the outgoing message j-array */
4849     k = 0;
4850     for (i=0; i<nsends; i++) {
4851       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4852       bufJ  = bufj+sstartsj[i];
4853       for (j=0; j<nrows; j++) {
4854         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4855         for (ll=0; ll<sbs; ll++) {
4856           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4857           for (l=0; l<ncols; l++) {
4858             *bufJ++ = cols[l];
4859           }
4860           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4861         }
4862       }
4863       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4864     }
4865 
4866     /* recvs and sends of j-array are completed */
4867     i = nrecvs;
4868     while (i--) {
4869       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4870     }
4871     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4872   } else if (scall == MAT_REUSE_MATRIX) {
4873     sstartsj = *startsj_s;
4874     rstartsj = *startsj_r;
4875     bufa     = *bufa_ptr;
4876     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4877     b_otha   = b_oth->a;
4878   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4879 
4880   /* a-array */
4881   /*---------*/
4882   /*  post receives of a-array */
4883   for (i=0; i<nrecvs; i++) {
4884     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4885     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4886   }
4887 
4888   /* pack the outgoing message a-array */
4889   k = 0;
4890   for (i=0; i<nsends; i++) {
4891     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4892     bufA  = bufa+sstartsj[i];
4893     for (j=0; j<nrows; j++) {
4894       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4895       for (ll=0; ll<sbs; ll++) {
4896         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4897         for (l=0; l<ncols; l++) {
4898           *bufA++ = vals[l];
4899         }
4900         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4901       }
4902     }
4903     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4904   }
4905   /* recvs and sends of a-array are completed */
4906   i = nrecvs;
4907   while (i--) {
4908     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4909   }
4910   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4911   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4912 
4913   if (scall == MAT_INITIAL_MATRIX) {
4914     /* put together the new matrix */
4915     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4916 
4917     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4918     /* Since these are PETSc arrays, change flags to free them as necessary. */
4919     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4920     b_oth->free_a  = PETSC_TRUE;
4921     b_oth->free_ij = PETSC_TRUE;
4922     b_oth->nonew   = 0;
4923 
4924     ierr = PetscFree(bufj);CHKERRQ(ierr);
4925     if (!startsj_s || !bufa_ptr) {
4926       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4927       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4928     } else {
4929       *startsj_s = sstartsj;
4930       *startsj_r = rstartsj;
4931       *bufa_ptr  = bufa;
4932     }
4933   }
4934   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4935   PetscFunctionReturn(0);
4936 }
4937 
4938 #undef __FUNCT__
4939 #define __FUNCT__ "MatGetCommunicationStructs"
4940 /*@C
4941   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4942 
4943   Not Collective
4944 
4945   Input Parameters:
4946 . A - The matrix in mpiaij format
4947 
4948   Output Parameter:
4949 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4950 . colmap - A map from global column index to local index into lvec
4951 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4952 
4953   Level: developer
4954 
4955 @*/
4956 #if defined(PETSC_USE_CTABLE)
4957 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4958 #else
4959 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4960 #endif
4961 {
4962   Mat_MPIAIJ *a;
4963 
4964   PetscFunctionBegin;
4965   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4966   PetscValidPointer(lvec, 2);
4967   PetscValidPointer(colmap, 3);
4968   PetscValidPointer(multScatter, 4);
4969   a = (Mat_MPIAIJ*) A->data;
4970   if (lvec) *lvec = a->lvec;
4971   if (colmap) *colmap = a->colmap;
4972   if (multScatter) *multScatter = a->Mvctx;
4973   PetscFunctionReturn(0);
4974 }
4975 
4976 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4977 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4978 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4979 #if defined(PETSC_HAVE_ELEMENTAL)
4980 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4981 #endif
4982 
4983 #undef __FUNCT__
4984 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4985 /*
4986     Computes (B'*A')' since computing B*A directly is untenable
4987 
4988                n                       p                          p
4989         (              )       (              )         (                  )
4990       m (      A       )  *  n (       B      )   =   m (         C        )
4991         (              )       (              )         (                  )
4992 
4993 */
4994 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4995 {
4996   PetscErrorCode ierr;
4997   Mat            At,Bt,Ct;
4998 
4999   PetscFunctionBegin;
5000   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5001   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5002   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5003   ierr = MatDestroy(&At);CHKERRQ(ierr);
5004   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5005   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5006   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5007   PetscFunctionReturn(0);
5008 }
5009 
5010 #undef __FUNCT__
5011 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5012 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5013 {
5014   PetscErrorCode ierr;
5015   PetscInt       m=A->rmap->n,n=B->cmap->n;
5016   Mat            Cmat;
5017 
5018   PetscFunctionBegin;
5019   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5020   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5021   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5022   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5023   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5024   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5025   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5026   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5027 
5028   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5029 
5030   *C = Cmat;
5031   PetscFunctionReturn(0);
5032 }
5033 
5034 /* ----------------------------------------------------------------*/
5035 #undef __FUNCT__
5036 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5037 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5038 {
5039   PetscErrorCode ierr;
5040 
5041   PetscFunctionBegin;
5042   if (scall == MAT_INITIAL_MATRIX) {
5043     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5044     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5045     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5046   }
5047   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5048   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5049   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5050   PetscFunctionReturn(0);
5051 }
5052 
5053 /*MC
5054    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5055 
5056    Options Database Keys:
5057 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5058 
5059   Level: beginner
5060 
5061 .seealso: MatCreateAIJ()
5062 M*/
5063 
5064 #undef __FUNCT__
5065 #define __FUNCT__ "MatCreate_MPIAIJ"
5066 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5067 {
5068   Mat_MPIAIJ     *b;
5069   PetscErrorCode ierr;
5070   PetscMPIInt    size;
5071 
5072   PetscFunctionBegin;
5073   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5074 
5075   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5076   B->data       = (void*)b;
5077   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5078   B->assembled  = PETSC_FALSE;
5079   B->insertmode = NOT_SET_VALUES;
5080   b->size       = size;
5081 
5082   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5083 
5084   /* build cache for off array entries formed */
5085   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5086 
5087   b->donotstash  = PETSC_FALSE;
5088   b->colmap      = 0;
5089   b->garray      = 0;
5090   b->roworiented = PETSC_TRUE;
5091 
5092   /* stuff used for matrix vector multiply */
5093   b->lvec  = NULL;
5094   b->Mvctx = NULL;
5095 
5096   /* stuff for MatGetRow() */
5097   b->rowindices   = 0;
5098   b->rowvalues    = 0;
5099   b->getrowactive = PETSC_FALSE;
5100 
5101   /* flexible pointer used in CUSP/CUSPARSE classes */
5102   b->spptr = NULL;
5103 
5104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5114 #if defined(PETSC_HAVE_ELEMENTAL)
5115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5116 #endif
5117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5120   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5121   PetscFunctionReturn(0);
5122 }
5123 
5124 #undef __FUNCT__
5125 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5126 /*@C
5127      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5128          and "off-diagonal" part of the matrix in CSR format.
5129 
5130    Collective on MPI_Comm
5131 
5132    Input Parameters:
5133 +  comm - MPI communicator
5134 .  m - number of local rows (Cannot be PETSC_DECIDE)
5135 .  n - This value should be the same as the local size used in creating the
5136        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5137        calculated if N is given) For square matrices n is almost always m.
5138 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5139 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5140 .   i - row indices for "diagonal" portion of matrix
5141 .   j - column indices
5142 .   a - matrix values
5143 .   oi - row indices for "off-diagonal" portion of matrix
5144 .   oj - column indices
5145 -   oa - matrix values
5146 
5147    Output Parameter:
5148 .   mat - the matrix
5149 
5150    Level: advanced
5151 
5152    Notes:
5153        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5154        must free the arrays once the matrix has been destroyed and not before.
5155 
5156        The i and j indices are 0 based
5157 
5158        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5159 
5160        This sets local rows and cannot be used to set off-processor values.
5161 
5162        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5163        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5164        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5165        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5166        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5167        communication if it is known that only local entries will be set.
5168 
5169 .keywords: matrix, aij, compressed row, sparse, parallel
5170 
5171 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5172           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5173 @*/
5174 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5175 {
5176   PetscErrorCode ierr;
5177   Mat_MPIAIJ     *maij;
5178 
5179   PetscFunctionBegin;
5180   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5181   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5182   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5183   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5184   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5185   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5186   maij = (Mat_MPIAIJ*) (*mat)->data;
5187 
5188   (*mat)->preallocated = PETSC_TRUE;
5189 
5190   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5191   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5192 
5193   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5194   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5195 
5196   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5197   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5198   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5199   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5200 
5201   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5202   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5203   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5204   PetscFunctionReturn(0);
5205 }
5206 
5207 /*
5208     Special version for direct calls from Fortran
5209 */
5210 #include <petsc/private/fortranimpl.h>
5211 
5212 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5213 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5214 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5215 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5216 #endif
5217 
5218 /* Change these macros so can be used in void function */
5219 #undef CHKERRQ
5220 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5221 #undef SETERRQ2
5222 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5223 #undef SETERRQ3
5224 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5225 #undef SETERRQ
5226 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5227 
5228 #undef __FUNCT__
5229 #define __FUNCT__ "matsetvaluesmpiaij_"
5230 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5231 {
5232   Mat            mat  = *mmat;
5233   PetscInt       m    = *mm, n = *mn;
5234   InsertMode     addv = *maddv;
5235   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5236   PetscScalar    value;
5237   PetscErrorCode ierr;
5238 
5239   MatCheckPreallocated(mat,1);
5240   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5241 
5242 #if defined(PETSC_USE_DEBUG)
5243   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5244 #endif
5245   {
5246     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5247     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5248     PetscBool roworiented = aij->roworiented;
5249 
5250     /* Some Variables required in the macro */
5251     Mat        A                 = aij->A;
5252     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5253     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5254     MatScalar  *aa               = a->a;
5255     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5256     Mat        B                 = aij->B;
5257     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5258     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5259     MatScalar  *ba               = b->a;
5260 
5261     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5262     PetscInt  nonew = a->nonew;
5263     MatScalar *ap1,*ap2;
5264 
5265     PetscFunctionBegin;
5266     for (i=0; i<m; i++) {
5267       if (im[i] < 0) continue;
5268 #if defined(PETSC_USE_DEBUG)
5269       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5270 #endif
5271       if (im[i] >= rstart && im[i] < rend) {
5272         row      = im[i] - rstart;
5273         lastcol1 = -1;
5274         rp1      = aj + ai[row];
5275         ap1      = aa + ai[row];
5276         rmax1    = aimax[row];
5277         nrow1    = ailen[row];
5278         low1     = 0;
5279         high1    = nrow1;
5280         lastcol2 = -1;
5281         rp2      = bj + bi[row];
5282         ap2      = ba + bi[row];
5283         rmax2    = bimax[row];
5284         nrow2    = bilen[row];
5285         low2     = 0;
5286         high2    = nrow2;
5287 
5288         for (j=0; j<n; j++) {
5289           if (roworiented) value = v[i*n+j];
5290           else value = v[i+j*m];
5291           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5292           if (in[j] >= cstart && in[j] < cend) {
5293             col = in[j] - cstart;
5294             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5295           } else if (in[j] < 0) continue;
5296 #if defined(PETSC_USE_DEBUG)
5297           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5298 #endif
5299           else {
5300             if (mat->was_assembled) {
5301               if (!aij->colmap) {
5302                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5303               }
5304 #if defined(PETSC_USE_CTABLE)
5305               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5306               col--;
5307 #else
5308               col = aij->colmap[in[j]] - 1;
5309 #endif
5310               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5311                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5312                 col  =  in[j];
5313                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5314                 B     = aij->B;
5315                 b     = (Mat_SeqAIJ*)B->data;
5316                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5317                 rp2   = bj + bi[row];
5318                 ap2   = ba + bi[row];
5319                 rmax2 = bimax[row];
5320                 nrow2 = bilen[row];
5321                 low2  = 0;
5322                 high2 = nrow2;
5323                 bm    = aij->B->rmap->n;
5324                 ba    = b->a;
5325               }
5326             } else col = in[j];
5327             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5328           }
5329         }
5330       } else if (!aij->donotstash) {
5331         if (roworiented) {
5332           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5333         } else {
5334           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5335         }
5336       }
5337     }
5338   }
5339   PetscFunctionReturnVoid();
5340 }
5341 
5342