xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 9b2437101ac1db3c201d7be9c485cbee0b1dde4b)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *lrows;
787   PetscInt       r, len;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   /* get locally owned rows */
792   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
793   /* fix right hand side if needed */
794   if (x && b) {
795     const PetscScalar *xx;
796     PetscScalar       *bb;
797 
798     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
799     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
800     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
801     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
802     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
803   }
804   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
805   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
806   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
807     PetscBool cong;
808     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
809     if (cong) A->congruentlayouts = 1;
810     else      A->congruentlayouts = 0;
811   }
812   if ((diag != 0.0) && A->congruentlayouts) {
813     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
814   } else if (diag != 0.0) {
815     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827 
828   /* only change matrix nonzero state if pattern was allowed to be changed */
829   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
830     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
831     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,p = 0,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x) {
888     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
891     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
892   }
893   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
894   /* remove zeroed rows of off diagonal matrix */
895   ii = aij->i;
896   for (i=0; i<len; i++) {
897     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
898   }
899   /* loop over all elements of off process part of matrix zeroing removed columns*/
900   if (aij->compressedrow.use) {
901     m    = aij->compressedrow.nrows;
902     ii   = aij->compressedrow.i;
903     ridx = aij->compressedrow.rindex;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908 
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[*ridx] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917       ridx++;
918     }
919   } else { /* do not use compressed row format */
920     m = l->B->rmap->n;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[i] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933     }
934   }
935   if (x) {
936     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
937     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
938   }
939   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
940   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPIAIJ"
953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
962   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultAdd_MPIAIJ"
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002   PetscBool      merged;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1006   /* do nondiagonal part */
1007   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1008   if (!merged) {
1009     /* send it on its way */
1010     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* receive remote parts: note this assumes the values are not actually */
1014     /* added in yy until the next line, */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   } else {
1017     /* do local part */
1018     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1019     /* send it on its way */
1020     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021     /* values actually were received in the Begin() but we need to call this nop */
1022     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1029 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1030 {
1031   MPI_Comm       comm;
1032   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1033   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1034   IS             Me,Notme;
1035   PetscErrorCode ierr;
1036   PetscInt       M,N,first,last,*notme,i;
1037   PetscMPIInt    size;
1038 
1039   PetscFunctionBegin;
1040   /* Easy test: symmetric diagonal block */
1041   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1042   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* send it on its way */
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   /* do local part */
1082   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* receive remote parts */
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*
1089   This only works correctly for square matrices where the subblock A->A is the
1090    diagonal block
1091 */
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1095 {
1096   PetscErrorCode ierr;
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1101   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1102   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatScale_MPIAIJ"
1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1115   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatDestroy_MPIAIJ"
1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1122 {
1123   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127 #if defined(PETSC_USE_LOG)
1128   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1129 #endif
1130   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1131   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1132   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1133   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1134 #if defined(PETSC_USE_CTABLE)
1135   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1136 #else
1137   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1138 #endif
1139   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1140   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1141   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1142   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1143   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1144   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1145 
1146   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1154 #if defined(PETSC_HAVE_ELEMENTAL)
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1156 #endif
1157 #if defined(PETSC_HAVE_HYPRE)
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1159 #endif
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1165 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1166 {
1167   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1168   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1169   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1170   PetscErrorCode ierr;
1171   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1172   int            fd;
1173   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1174   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1175   PetscScalar    *column_values;
1176   PetscInt       message_count,flowcontrolcount;
1177   FILE           *file;
1178 
1179   PetscFunctionBegin;
1180   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1181   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1182   nz   = A->nz + B->nz;
1183   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1184   if (!rank) {
1185     header[0] = MAT_FILE_CLASSID;
1186     header[1] = mat->rmap->N;
1187     header[2] = mat->cmap->N;
1188 
1189     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1190     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1191     /* get largest number of rows any processor has */
1192     rlen  = mat->rmap->n;
1193     range = mat->rmap->range;
1194     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1195   } else {
1196     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1197     rlen = mat->rmap->n;
1198   }
1199 
1200   /* load up the local row counts */
1201   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1202   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1203 
1204   /* store the row lengths to the file */
1205   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1206   if (!rank) {
1207     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1208     for (i=1; i<size; i++) {
1209       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1210       rlen = range[i+1] - range[i];
1211       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1213     }
1214     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1215   } else {
1216     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1217     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1218     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1219   }
1220   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1221 
1222   /* load up the local column indices */
1223   nzmax = nz; /* th processor needs space a largest processor needs */
1224   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1226   cnt   = 0;
1227   for (i=0; i<mat->rmap->n; i++) {
1228     for (j=B->i[i]; j<B->i[i+1]; j++) {
1229       if ((col = garray[B->j[j]]) > cstart) break;
1230       column_indices[cnt++] = col;
1231     }
1232     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1233     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1234   }
1235   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1236 
1237   /* store the column indices to the file */
1238   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1239   if (!rank) {
1240     MPI_Status status;
1241     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     for (i=1; i<size; i++) {
1243       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1244       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1245       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1246       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1248     }
1249     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1250   } else {
1251     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1252     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1253     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1255   }
1256   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1257 
1258   /* load up the local column values */
1259   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1260   cnt  = 0;
1261   for (i=0; i<mat->rmap->n; i++) {
1262     for (j=B->i[i]; j<B->i[i+1]; j++) {
1263       if (garray[B->j[j]] > cstart) break;
1264       column_values[cnt++] = B->a[j];
1265     }
1266     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1267     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1268   }
1269   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1270 
1271   /* store the column values to the file */
1272   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1273   if (!rank) {
1274     MPI_Status status;
1275     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1276     for (i=1; i<size; i++) {
1277       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1278       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1279       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1280       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1282     }
1283     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1284   } else {
1285     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1286     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1288     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1289   }
1290   ierr = PetscFree(column_values);CHKERRQ(ierr);
1291 
1292   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1293   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #include <petscdraw.h>
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1300 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1301 {
1302   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1303   PetscErrorCode    ierr;
1304   PetscMPIInt       rank = aij->rank,size = aij->size;
1305   PetscBool         isdraw,iascii,isbinary;
1306   PetscViewer       sviewer;
1307   PetscViewerFormat format;
1308 
1309   PetscFunctionBegin;
1310   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1311   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1312   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1313   if (iascii) {
1314     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1315     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1316       MatInfo   info;
1317       PetscBool inodes;
1318 
1319       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1320       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1321       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1322       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1323       if (!inodes) {
1324         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1325                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1326       } else {
1327         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1328                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1329       }
1330       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1331       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1332       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1333       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1334       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1335       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1336       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1337       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1338       PetscFunctionReturn(0);
1339     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1340       PetscInt inodecount,inodelimit,*inodes;
1341       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1342       if (inodes) {
1343         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1344       } else {
1345         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1346       }
1347       PetscFunctionReturn(0);
1348     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1349       PetscFunctionReturn(0);
1350     }
1351   } else if (isbinary) {
1352     if (size == 1) {
1353       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1354       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1355     } else {
1356       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1357     }
1358     PetscFunctionReturn(0);
1359   } else if (isdraw) {
1360     PetscDraw draw;
1361     PetscBool isnull;
1362     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1363     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1364     if (isnull) PetscFunctionReturn(0);
1365   }
1366 
1367   {
1368     /* assemble the entire matrix onto first processor. */
1369     Mat        A;
1370     Mat_SeqAIJ *Aloc;
1371     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1372     MatScalar  *a;
1373 
1374     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1375     if (!rank) {
1376       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1377     } else {
1378       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1379     }
1380     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1381     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1382     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1383     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1384     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1385 
1386     /* copy over the A part */
1387     Aloc = (Mat_SeqAIJ*)aij->A->data;
1388     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1389     row  = mat->rmap->rstart;
1390     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1391     for (i=0; i<m; i++) {
1392       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1393       row++;
1394       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1395     }
1396     aj = Aloc->j;
1397     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1398 
1399     /* copy over the B part */
1400     Aloc = (Mat_SeqAIJ*)aij->B->data;
1401     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1402     row  = mat->rmap->rstart;
1403     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1404     ct   = cols;
1405     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1406     for (i=0; i<m; i++) {
1407       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1408       row++;
1409       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1410     }
1411     ierr = PetscFree(ct);CHKERRQ(ierr);
1412     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1413     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1414     /*
1415        Everyone has to call to draw the matrix since the graphics waits are
1416        synchronized across all processors that share the PetscDraw object
1417     */
1418     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1419     if (!rank) {
1420       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1421       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1422     }
1423     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1424     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1425     ierr = MatDestroy(&A);CHKERRQ(ierr);
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatView_MPIAIJ"
1432 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1433 {
1434   PetscErrorCode ierr;
1435   PetscBool      iascii,isdraw,issocket,isbinary;
1436 
1437   PetscFunctionBegin;
1438   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1439   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1440   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1441   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1442   if (iascii || isdraw || isbinary || issocket) {
1443     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1444   }
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatSOR_MPIAIJ"
1450 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1451 {
1452   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1453   PetscErrorCode ierr;
1454   Vec            bb1 = 0;
1455   PetscBool      hasop;
1456 
1457   PetscFunctionBegin;
1458   if (flag == SOR_APPLY_UPPER) {
1459     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1460     PetscFunctionReturn(0);
1461   }
1462 
1463   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1464     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1465   }
1466 
1467   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1468     if (flag & SOR_ZERO_INITIAL_GUESS) {
1469       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1470       its--;
1471     }
1472 
1473     while (its--) {
1474       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1475       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1476 
1477       /* update rhs: bb1 = bb - B*x */
1478       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1479       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1480 
1481       /* local sweep */
1482       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1483     }
1484   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1485     if (flag & SOR_ZERO_INITIAL_GUESS) {
1486       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1487       its--;
1488     }
1489     while (its--) {
1490       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1491       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1492 
1493       /* update rhs: bb1 = bb - B*x */
1494       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1495       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1496 
1497       /* local sweep */
1498       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1499     }
1500   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1501     if (flag & SOR_ZERO_INITIAL_GUESS) {
1502       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1503       its--;
1504     }
1505     while (its--) {
1506       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1507       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1508 
1509       /* update rhs: bb1 = bb - B*x */
1510       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1511       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1512 
1513       /* local sweep */
1514       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1515     }
1516   } else if (flag & SOR_EISENSTAT) {
1517     Vec xx1;
1518 
1519     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1520     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1521 
1522     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1523     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1524     if (!mat->diag) {
1525       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1526       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1527     }
1528     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1529     if (hasop) {
1530       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1531     } else {
1532       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1533     }
1534     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1535 
1536     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1537 
1538     /* local sweep */
1539     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1540     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1541     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1542   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1543 
1544   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1545 
1546   matin->factorerrortype = mat->A->factorerrortype;
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatPermute_MPIAIJ"
1552 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1553 {
1554   Mat            aA,aB,Aperm;
1555   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1556   PetscScalar    *aa,*ba;
1557   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1558   PetscSF        rowsf,sf;
1559   IS             parcolp = NULL;
1560   PetscBool      done;
1561   PetscErrorCode ierr;
1562 
1563   PetscFunctionBegin;
1564   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1565   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1566   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1567   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1568 
1569   /* Invert row permutation to find out where my rows should go */
1570   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1571   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1572   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1573   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1574   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1575   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1576 
1577   /* Invert column permutation to find out where my columns should go */
1578   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1579   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1580   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1581   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1582   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1583   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1584   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1585 
1586   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1587   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1588   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1589 
1590   /* Find out where my gcols should go */
1591   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1592   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1593   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1594   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1595   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1596   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1597   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1598   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1599 
1600   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1601   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1602   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1603   for (i=0; i<m; i++) {
1604     PetscInt row = rdest[i],rowner;
1605     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1606     for (j=ai[i]; j<ai[i+1]; j++) {
1607       PetscInt cowner,col = cdest[aj[j]];
1608       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1609       if (rowner == cowner) dnnz[i]++;
1610       else onnz[i]++;
1611     }
1612     for (j=bi[i]; j<bi[i+1]; j++) {
1613       PetscInt cowner,col = gcdest[bj[j]];
1614       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1615       if (rowner == cowner) dnnz[i]++;
1616       else onnz[i]++;
1617     }
1618   }
1619   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1620   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1621   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1622   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1623   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1624 
1625   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1626   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1627   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1628   for (i=0; i<m; i++) {
1629     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1630     PetscInt j0,rowlen;
1631     rowlen = ai[i+1] - ai[i];
1632     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1633       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1634       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1635     }
1636     rowlen = bi[i+1] - bi[i];
1637     for (j0=j=0; j<rowlen; j0=j) {
1638       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1639       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1640     }
1641   }
1642   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1644   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1645   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1646   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1647   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1648   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1649   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1650   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1651   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1652   *B = Aperm;
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1658 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1659 {
1660   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1661   PetscErrorCode ierr;
1662 
1663   PetscFunctionBegin;
1664   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1665   if (ghosts) *ghosts = aij->garray;
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 #undef __FUNCT__
1670 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1671 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1672 {
1673   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1674   Mat            A    = mat->A,B = mat->B;
1675   PetscErrorCode ierr;
1676   PetscReal      isend[5],irecv[5];
1677 
1678   PetscFunctionBegin;
1679   info->block_size = 1.0;
1680   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1681 
1682   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1683   isend[3] = info->memory;  isend[4] = info->mallocs;
1684 
1685   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1686 
1687   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1688   isend[3] += info->memory;  isend[4] += info->mallocs;
1689   if (flag == MAT_LOCAL) {
1690     info->nz_used      = isend[0];
1691     info->nz_allocated = isend[1];
1692     info->nz_unneeded  = isend[2];
1693     info->memory       = isend[3];
1694     info->mallocs      = isend[4];
1695   } else if (flag == MAT_GLOBAL_MAX) {
1696     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1697 
1698     info->nz_used      = irecv[0];
1699     info->nz_allocated = irecv[1];
1700     info->nz_unneeded  = irecv[2];
1701     info->memory       = irecv[3];
1702     info->mallocs      = irecv[4];
1703   } else if (flag == MAT_GLOBAL_SUM) {
1704     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1705 
1706     info->nz_used      = irecv[0];
1707     info->nz_allocated = irecv[1];
1708     info->nz_unneeded  = irecv[2];
1709     info->memory       = irecv[3];
1710     info->mallocs      = irecv[4];
1711   }
1712   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1713   info->fill_ratio_needed = 0;
1714   info->factor_mallocs    = 0;
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 #undef __FUNCT__
1719 #define __FUNCT__ "MatSetOption_MPIAIJ"
1720 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1721 {
1722   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1723   PetscErrorCode ierr;
1724 
1725   PetscFunctionBegin;
1726   switch (op) {
1727   case MAT_NEW_NONZERO_LOCATIONS:
1728   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1729   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1730   case MAT_KEEP_NONZERO_PATTERN:
1731   case MAT_NEW_NONZERO_LOCATION_ERR:
1732   case MAT_USE_INODES:
1733   case MAT_IGNORE_ZERO_ENTRIES:
1734     MatCheckPreallocated(A,1);
1735     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1736     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1737     break;
1738   case MAT_ROW_ORIENTED:
1739     MatCheckPreallocated(A,1);
1740     a->roworiented = flg;
1741 
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1744     break;
1745   case MAT_NEW_DIAGONALS:
1746     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1747     break;
1748   case MAT_IGNORE_OFF_PROC_ENTRIES:
1749     a->donotstash = flg;
1750     break;
1751   case MAT_SPD:
1752     A->spd_set = PETSC_TRUE;
1753     A->spd     = flg;
1754     if (flg) {
1755       A->symmetric                  = PETSC_TRUE;
1756       A->structurally_symmetric     = PETSC_TRUE;
1757       A->symmetric_set              = PETSC_TRUE;
1758       A->structurally_symmetric_set = PETSC_TRUE;
1759     }
1760     break;
1761   case MAT_SYMMETRIC:
1762     MatCheckPreallocated(A,1);
1763     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1764     break;
1765   case MAT_STRUCTURALLY_SYMMETRIC:
1766     MatCheckPreallocated(A,1);
1767     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1768     break;
1769   case MAT_HERMITIAN:
1770     MatCheckPreallocated(A,1);
1771     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1772     break;
1773   case MAT_SYMMETRY_ETERNAL:
1774     MatCheckPreallocated(A,1);
1775     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1776     break;
1777   case MAT_SUBMAT_SINGLEIS:
1778     A->submat_singleis = flg;
1779     break;
1780   default:
1781     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1782   }
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 #undef __FUNCT__
1787 #define __FUNCT__ "MatGetRow_MPIAIJ"
1788 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1789 {
1790   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1791   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1792   PetscErrorCode ierr;
1793   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1794   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1795   PetscInt       *cmap,*idx_p;
1796 
1797   PetscFunctionBegin;
1798   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1799   mat->getrowactive = PETSC_TRUE;
1800 
1801   if (!mat->rowvalues && (idx || v)) {
1802     /*
1803         allocate enough space to hold information from the longest row.
1804     */
1805     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1806     PetscInt   max = 1,tmp;
1807     for (i=0; i<matin->rmap->n; i++) {
1808       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1809       if (max < tmp) max = tmp;
1810     }
1811     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1812   }
1813 
1814   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1815   lrow = row - rstart;
1816 
1817   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1818   if (!v)   {pvA = 0; pvB = 0;}
1819   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1820   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1821   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1822   nztot = nzA + nzB;
1823 
1824   cmap = mat->garray;
1825   if (v  || idx) {
1826     if (nztot) {
1827       /* Sort by increasing column numbers, assuming A and B already sorted */
1828       PetscInt imark = -1;
1829       if (v) {
1830         *v = v_p = mat->rowvalues;
1831         for (i=0; i<nzB; i++) {
1832           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1833           else break;
1834         }
1835         imark = i;
1836         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1837         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1838       }
1839       if (idx) {
1840         *idx = idx_p = mat->rowindices;
1841         if (imark > -1) {
1842           for (i=0; i<imark; i++) {
1843             idx_p[i] = cmap[cworkB[i]];
1844           }
1845         } else {
1846           for (i=0; i<nzB; i++) {
1847             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1848             else break;
1849           }
1850           imark = i;
1851         }
1852         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1853         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1854       }
1855     } else {
1856       if (idx) *idx = 0;
1857       if (v)   *v   = 0;
1858     }
1859   }
1860   *nz  = nztot;
1861   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1862   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 #undef __FUNCT__
1867 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1868 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1869 {
1870   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1871 
1872   PetscFunctionBegin;
1873   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1874   aij->getrowactive = PETSC_FALSE;
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatNorm_MPIAIJ"
1880 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1881 {
1882   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1883   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1884   PetscErrorCode ierr;
1885   PetscInt       i,j,cstart = mat->cmap->rstart;
1886   PetscReal      sum = 0.0;
1887   MatScalar      *v;
1888 
1889   PetscFunctionBegin;
1890   if (aij->size == 1) {
1891     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1892   } else {
1893     if (type == NORM_FROBENIUS) {
1894       v = amat->a;
1895       for (i=0; i<amat->nz; i++) {
1896         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1897       }
1898       v = bmat->a;
1899       for (i=0; i<bmat->nz; i++) {
1900         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1901       }
1902       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1903       *norm = PetscSqrtReal(*norm);
1904       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1905     } else if (type == NORM_1) { /* max column norm */
1906       PetscReal *tmp,*tmp2;
1907       PetscInt  *jj,*garray = aij->garray;
1908       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1909       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1910       *norm = 0.0;
1911       v     = amat->a; jj = amat->j;
1912       for (j=0; j<amat->nz; j++) {
1913         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1914       }
1915       v = bmat->a; jj = bmat->j;
1916       for (j=0; j<bmat->nz; j++) {
1917         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1918       }
1919       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1920       for (j=0; j<mat->cmap->N; j++) {
1921         if (tmp2[j] > *norm) *norm = tmp2[j];
1922       }
1923       ierr = PetscFree(tmp);CHKERRQ(ierr);
1924       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1925       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1926     } else if (type == NORM_INFINITY) { /* max row norm */
1927       PetscReal ntemp = 0.0;
1928       for (j=0; j<aij->A->rmap->n; j++) {
1929         v   = amat->a + amat->i[j];
1930         sum = 0.0;
1931         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1932           sum += PetscAbsScalar(*v); v++;
1933         }
1934         v = bmat->a + bmat->i[j];
1935         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1936           sum += PetscAbsScalar(*v); v++;
1937         }
1938         if (sum > ntemp) ntemp = sum;
1939       }
1940       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1941       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1942     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1943   }
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatTranspose_MPIAIJ"
1949 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1950 {
1951   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1952   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1953   PetscErrorCode ierr;
1954   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1955   PetscInt       cstart = A->cmap->rstart,ncol;
1956   Mat            B;
1957   MatScalar      *array;
1958 
1959   PetscFunctionBegin;
1960   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1961 
1962   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1963   ai = Aloc->i; aj = Aloc->j;
1964   bi = Bloc->i; bj = Bloc->j;
1965   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1966     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1967     PetscSFNode          *oloc;
1968     PETSC_UNUSED PetscSF sf;
1969 
1970     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1971     /* compute d_nnz for preallocation */
1972     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1973     for (i=0; i<ai[ma]; i++) {
1974       d_nnz[aj[i]]++;
1975       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1976     }
1977     /* compute local off-diagonal contributions */
1978     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1979     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1980     /* map those to global */
1981     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1982     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1983     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1984     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1985     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1986     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1987     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1988 
1989     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1990     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1991     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1992     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1993     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1994     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1995   } else {
1996     B    = *matout;
1997     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1998     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1999   }
2000 
2001   /* copy over the A part */
2002   array = Aloc->a;
2003   row   = A->rmap->rstart;
2004   for (i=0; i<ma; i++) {
2005     ncol = ai[i+1]-ai[i];
2006     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2007     row++;
2008     array += ncol; aj += ncol;
2009   }
2010   aj = Aloc->j;
2011   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2012 
2013   /* copy over the B part */
2014   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2015   array = Bloc->a;
2016   row   = A->rmap->rstart;
2017   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2018   cols_tmp = cols;
2019   for (i=0; i<mb; i++) {
2020     ncol = bi[i+1]-bi[i];
2021     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2022     row++;
2023     array += ncol; cols_tmp += ncol;
2024   }
2025   ierr = PetscFree(cols);CHKERRQ(ierr);
2026 
2027   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2028   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2029   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2030     *matout = B;
2031   } else {
2032     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2033   }
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2039 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2040 {
2041   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2042   Mat            a    = aij->A,b = aij->B;
2043   PetscErrorCode ierr;
2044   PetscInt       s1,s2,s3;
2045 
2046   PetscFunctionBegin;
2047   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2048   if (rr) {
2049     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2050     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2051     /* Overlap communication with computation. */
2052     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2053   }
2054   if (ll) {
2055     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2056     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2057     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2058   }
2059   /* scale  the diagonal block */
2060   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2061 
2062   if (rr) {
2063     /* Do a scatter end and then right scale the off-diagonal block */
2064     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2065     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2066   }
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2072 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2073 {
2074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2075   PetscErrorCode ierr;
2076 
2077   PetscFunctionBegin;
2078   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 #undef __FUNCT__
2083 #define __FUNCT__ "MatEqual_MPIAIJ"
2084 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2085 {
2086   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2087   Mat            a,b,c,d;
2088   PetscBool      flg;
2089   PetscErrorCode ierr;
2090 
2091   PetscFunctionBegin;
2092   a = matA->A; b = matA->B;
2093   c = matB->A; d = matB->B;
2094 
2095   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2096   if (flg) {
2097     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2098   }
2099   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 #undef __FUNCT__
2104 #define __FUNCT__ "MatCopy_MPIAIJ"
2105 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2106 {
2107   PetscErrorCode ierr;
2108   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2109   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2110 
2111   PetscFunctionBegin;
2112   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2113   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2114     /* because of the column compression in the off-processor part of the matrix a->B,
2115        the number of columns in a->B and b->B may be different, hence we cannot call
2116        the MatCopy() directly on the two parts. If need be, we can provide a more
2117        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2118        then copying the submatrices */
2119     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2120   } else {
2121     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2122     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2123   }
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 #undef __FUNCT__
2128 #define __FUNCT__ "MatSetUp_MPIAIJ"
2129 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2130 {
2131   PetscErrorCode ierr;
2132 
2133   PetscFunctionBegin;
2134   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 /*
2139    Computes the number of nonzeros per row needed for preallocation when X and Y
2140    have different nonzero structure.
2141 */
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2144 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2145 {
2146   PetscInt       i,j,k,nzx,nzy;
2147 
2148   PetscFunctionBegin;
2149   /* Set the number of nonzeros in the new matrix */
2150   for (i=0; i<m; i++) {
2151     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2152     nzx = xi[i+1] - xi[i];
2153     nzy = yi[i+1] - yi[i];
2154     nnz[i] = 0;
2155     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2156       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2157       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2158       nnz[i]++;
2159     }
2160     for (; k<nzy; k++) nnz[i]++;
2161   }
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2166 #undef __FUNCT__
2167 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2168 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2169 {
2170   PetscErrorCode ierr;
2171   PetscInt       m = Y->rmap->N;
2172   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2173   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2174 
2175   PetscFunctionBegin;
2176   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatAXPY_MPIAIJ"
2182 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2183 {
2184   PetscErrorCode ierr;
2185   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2186   PetscBLASInt   bnz,one=1;
2187   Mat_SeqAIJ     *x,*y;
2188 
2189   PetscFunctionBegin;
2190   if (str == SAME_NONZERO_PATTERN) {
2191     PetscScalar alpha = a;
2192     x    = (Mat_SeqAIJ*)xx->A->data;
2193     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2194     y    = (Mat_SeqAIJ*)yy->A->data;
2195     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2196     x    = (Mat_SeqAIJ*)xx->B->data;
2197     y    = (Mat_SeqAIJ*)yy->B->data;
2198     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2199     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2200     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2201   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2202     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2203   } else {
2204     Mat      B;
2205     PetscInt *nnz_d,*nnz_o;
2206     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2207     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2208     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2209     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2210     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2211     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2212     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2213     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2214     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2215     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2216     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2217     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2218     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2219     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2220   }
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatConjugate_MPIAIJ"
2228 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2229 {
2230 #if defined(PETSC_USE_COMPLEX)
2231   PetscErrorCode ierr;
2232   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2233 
2234   PetscFunctionBegin;
2235   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2236   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2237 #else
2238   PetscFunctionBegin;
2239 #endif
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatRealPart_MPIAIJ"
2245 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2246 {
2247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2248   PetscErrorCode ierr;
2249 
2250   PetscFunctionBegin;
2251   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2252   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2258 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2259 {
2260   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2265   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2271 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2272 {
2273   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2274   PetscErrorCode ierr;
2275   PetscInt       i,*idxb = 0;
2276   PetscScalar    *va,*vb;
2277   Vec            vtmp;
2278 
2279   PetscFunctionBegin;
2280   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2281   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2282   if (idx) {
2283     for (i=0; i<A->rmap->n; i++) {
2284       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2285     }
2286   }
2287 
2288   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2289   if (idx) {
2290     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2291   }
2292   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2293   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2294 
2295   for (i=0; i<A->rmap->n; i++) {
2296     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2297       va[i] = vb[i];
2298       if (idx) idx[i] = a->garray[idxb[i]];
2299     }
2300   }
2301 
2302   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2303   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2304   ierr = PetscFree(idxb);CHKERRQ(ierr);
2305   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2306   PetscFunctionReturn(0);
2307 }
2308 
2309 #undef __FUNCT__
2310 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2311 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2312 {
2313   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2314   PetscErrorCode ierr;
2315   PetscInt       i,*idxb = 0;
2316   PetscScalar    *va,*vb;
2317   Vec            vtmp;
2318 
2319   PetscFunctionBegin;
2320   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2321   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2322   if (idx) {
2323     for (i=0; i<A->cmap->n; i++) {
2324       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2325     }
2326   }
2327 
2328   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2329   if (idx) {
2330     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2331   }
2332   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2333   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2334 
2335   for (i=0; i<A->rmap->n; i++) {
2336     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2337       va[i] = vb[i];
2338       if (idx) idx[i] = a->garray[idxb[i]];
2339     }
2340   }
2341 
2342   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2343   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2344   ierr = PetscFree(idxb);CHKERRQ(ierr);
2345   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2351 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2352 {
2353   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2354   PetscInt       n      = A->rmap->n;
2355   PetscInt       cstart = A->cmap->rstart;
2356   PetscInt       *cmap  = mat->garray;
2357   PetscInt       *diagIdx, *offdiagIdx;
2358   Vec            diagV, offdiagV;
2359   PetscScalar    *a, *diagA, *offdiagA;
2360   PetscInt       r;
2361   PetscErrorCode ierr;
2362 
2363   PetscFunctionBegin;
2364   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2365   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2366   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2367   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2368   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2369   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2370   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2371   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2372   for (r = 0; r < n; ++r) {
2373     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2374       a[r]   = diagA[r];
2375       idx[r] = cstart + diagIdx[r];
2376     } else {
2377       a[r]   = offdiagA[r];
2378       idx[r] = cmap[offdiagIdx[r]];
2379     }
2380   }
2381   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2382   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2383   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2384   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2385   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2386   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2392 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2393 {
2394   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2395   PetscInt       n      = A->rmap->n;
2396   PetscInt       cstart = A->cmap->rstart;
2397   PetscInt       *cmap  = mat->garray;
2398   PetscInt       *diagIdx, *offdiagIdx;
2399   Vec            diagV, offdiagV;
2400   PetscScalar    *a, *diagA, *offdiagA;
2401   PetscInt       r;
2402   PetscErrorCode ierr;
2403 
2404   PetscFunctionBegin;
2405   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2406   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2407   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2408   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2409   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2410   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2411   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2412   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2413   for (r = 0; r < n; ++r) {
2414     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2415       a[r]   = diagA[r];
2416       idx[r] = cstart + diagIdx[r];
2417     } else {
2418       a[r]   = offdiagA[r];
2419       idx[r] = cmap[offdiagIdx[r]];
2420     }
2421   }
2422   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2423   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2424   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2425   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2426   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2427   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 #undef __FUNCT__
2432 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2433 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2434 {
2435   PetscErrorCode ierr;
2436   Mat            *dummy;
2437 
2438   PetscFunctionBegin;
2439   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2440   *newmat = *dummy;
2441   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 #undef __FUNCT__
2446 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2447 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2448 {
2449   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2450   PetscErrorCode ierr;
2451 
2452   PetscFunctionBegin;
2453   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2454   A->factorerrortype = a->A->factorerrortype;
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 #undef __FUNCT__
2459 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2460 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2461 {
2462   PetscErrorCode ierr;
2463   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2464 
2465   PetscFunctionBegin;
2466   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2467   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2468   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2469   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2475 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2476 {
2477   PetscFunctionBegin;
2478   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2479   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2485 /*@
2486    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2487 
2488    Collective on Mat
2489 
2490    Input Parameters:
2491 +    A - the matrix
2492 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2493 
2494  Level: advanced
2495 
2496 @*/
2497 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2498 {
2499   PetscErrorCode       ierr;
2500 
2501   PetscFunctionBegin;
2502   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 #undef __FUNCT__
2507 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2508 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2509 {
2510   PetscErrorCode       ierr;
2511   PetscBool            sc = PETSC_FALSE,flg;
2512 
2513   PetscFunctionBegin;
2514   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2515   ierr = PetscObjectOptionsBegin((PetscObject)A);
2516     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2517     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2518     if (flg) {
2519       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2520     }
2521   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 #undef __FUNCT__
2526 #define __FUNCT__ "MatShift_MPIAIJ"
2527 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2528 {
2529   PetscErrorCode ierr;
2530   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2531   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2532 
2533   PetscFunctionBegin;
2534   if (!Y->preallocated) {
2535     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2536   } else if (!aij->nz) {
2537     PetscInt nonew = aij->nonew;
2538     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2539     aij->nonew = nonew;
2540   }
2541   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 #undef __FUNCT__
2546 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2547 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2548 {
2549   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2550   PetscErrorCode ierr;
2551 
2552   PetscFunctionBegin;
2553   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2554   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2555   if (d) {
2556     PetscInt rstart;
2557     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2558     *d += rstart;
2559 
2560   }
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 
2565 /* -------------------------------------------------------------------*/
2566 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2567                                        MatGetRow_MPIAIJ,
2568                                        MatRestoreRow_MPIAIJ,
2569                                        MatMult_MPIAIJ,
2570                                 /* 4*/ MatMultAdd_MPIAIJ,
2571                                        MatMultTranspose_MPIAIJ,
2572                                        MatMultTransposeAdd_MPIAIJ,
2573                                        0,
2574                                        0,
2575                                        0,
2576                                 /*10*/ 0,
2577                                        0,
2578                                        0,
2579                                        MatSOR_MPIAIJ,
2580                                        MatTranspose_MPIAIJ,
2581                                 /*15*/ MatGetInfo_MPIAIJ,
2582                                        MatEqual_MPIAIJ,
2583                                        MatGetDiagonal_MPIAIJ,
2584                                        MatDiagonalScale_MPIAIJ,
2585                                        MatNorm_MPIAIJ,
2586                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2587                                        MatAssemblyEnd_MPIAIJ,
2588                                        MatSetOption_MPIAIJ,
2589                                        MatZeroEntries_MPIAIJ,
2590                                 /*24*/ MatZeroRows_MPIAIJ,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                 /*29*/ MatSetUp_MPIAIJ,
2596                                        0,
2597                                        0,
2598                                        MatGetDiagonalBlock_MPIAIJ,
2599                                        0,
2600                                 /*34*/ MatDuplicate_MPIAIJ,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                 /*39*/ MatAXPY_MPIAIJ,
2606                                        MatGetSubMatrices_MPIAIJ,
2607                                        MatIncreaseOverlap_MPIAIJ,
2608                                        MatGetValues_MPIAIJ,
2609                                        MatCopy_MPIAIJ,
2610                                 /*44*/ MatGetRowMax_MPIAIJ,
2611                                        MatScale_MPIAIJ,
2612                                        MatShift_MPIAIJ,
2613                                        MatDiagonalSet_MPIAIJ,
2614                                        MatZeroRowsColumns_MPIAIJ,
2615                                 /*49*/ MatSetRandom_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2621                                        0,
2622                                        MatSetUnfactored_MPIAIJ,
2623                                        MatPermute_MPIAIJ,
2624                                        0,
2625                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2626                                        MatDestroy_MPIAIJ,
2627                                        MatView_MPIAIJ,
2628                                        0,
2629                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2630                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2631                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2636                                        MatGetRowMinAbs_MPIAIJ,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                 /*75*/ MatFDColoringApply_AIJ,
2642                                        MatSetFromOptions_MPIAIJ,
2643                                        0,
2644                                        0,
2645                                        MatFindZeroDiagonals_MPIAIJ,
2646                                 /*80*/ 0,
2647                                        0,
2648                                        0,
2649                                 /*83*/ MatLoad_MPIAIJ,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2656                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2657                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2658                                        MatPtAP_MPIAIJ_MPIAIJ,
2659                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2660                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                 /*99*/ 0,
2666                                        0,
2667                                        0,
2668                                        MatConjugate_MPIAIJ,
2669                                        0,
2670                                 /*104*/MatSetValuesRow_MPIAIJ,
2671                                        MatRealPart_MPIAIJ,
2672                                        MatImaginaryPart_MPIAIJ,
2673                                        0,
2674                                        0,
2675                                 /*109*/0,
2676                                        0,
2677                                        MatGetRowMin_MPIAIJ,
2678                                        0,
2679                                        MatMissingDiagonal_MPIAIJ,
2680                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2681                                        0,
2682                                        MatGetGhosts_MPIAIJ,
2683                                        0,
2684                                        0,
2685                                 /*119*/0,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                        MatGetMultiProcBlock_MPIAIJ,
2690                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2691                                        MatGetColumnNorms_MPIAIJ,
2692                                        MatInvertBlockDiagonal_MPIAIJ,
2693                                        0,
2694                                        MatGetSubMatricesMPI_MPIAIJ,
2695                                 /*129*/0,
2696                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2697                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2698                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2699                                        0,
2700                                 /*134*/0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                 /*139*/0,
2706                                        0,
2707                                        0,
2708                                        MatFDColoringSetUp_MPIXAIJ,
2709                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2710                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2711 };
2712 
2713 /* ----------------------------------------------------------------------------------------*/
2714 
2715 #undef __FUNCT__
2716 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2717 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2718 {
2719   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2720   PetscErrorCode ierr;
2721 
2722   PetscFunctionBegin;
2723   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2724   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2725   PetscFunctionReturn(0);
2726 }
2727 
2728 #undef __FUNCT__
2729 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2730 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2731 {
2732   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2733   PetscErrorCode ierr;
2734 
2735   PetscFunctionBegin;
2736   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2737   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2738   PetscFunctionReturn(0);
2739 }
2740 
2741 #undef __FUNCT__
2742 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2743 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2744 {
2745   Mat_MPIAIJ     *b;
2746   PetscErrorCode ierr;
2747 
2748   PetscFunctionBegin;
2749   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2750   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2751   b = (Mat_MPIAIJ*)B->data;
2752 
2753 #if defined(PETSC_USE_CTABLE)
2754   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2755 #else
2756   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2757 #endif
2758   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2759   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2760   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2761 
2762   /* Because the B will have been resized we simply destroy it and create a new one each time */
2763   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2764   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2765   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2766   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2767   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2768   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2769 
2770   if (!B->preallocated) {
2771     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2772     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2773     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2774     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2775     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2776   }
2777 
2778   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2779   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2780   B->preallocated  = PETSC_TRUE;
2781   B->was_assembled = PETSC_FALSE;
2782   B->assembled     = PETSC_FALSE;;
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 #undef __FUNCT__
2787 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2788 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2789 {
2790   Mat            mat;
2791   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2792   PetscErrorCode ierr;
2793 
2794   PetscFunctionBegin;
2795   *newmat = 0;
2796   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2797   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2798   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2799   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2800   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2801   a       = (Mat_MPIAIJ*)mat->data;
2802 
2803   mat->factortype   = matin->factortype;
2804   mat->assembled    = PETSC_TRUE;
2805   mat->insertmode   = NOT_SET_VALUES;
2806   mat->preallocated = PETSC_TRUE;
2807 
2808   a->size         = oldmat->size;
2809   a->rank         = oldmat->rank;
2810   a->donotstash   = oldmat->donotstash;
2811   a->roworiented  = oldmat->roworiented;
2812   a->rowindices   = 0;
2813   a->rowvalues    = 0;
2814   a->getrowactive = PETSC_FALSE;
2815 
2816   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2817   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2818 
2819   if (oldmat->colmap) {
2820 #if defined(PETSC_USE_CTABLE)
2821     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2822 #else
2823     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2824     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2825     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2826 #endif
2827   } else a->colmap = 0;
2828   if (oldmat->garray) {
2829     PetscInt len;
2830     len  = oldmat->B->cmap->n;
2831     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2832     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2833     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2834   } else a->garray = 0;
2835 
2836   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2837   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2838   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2839   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2840   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2841   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2842   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2843   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2844   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2845   *newmat = mat;
2846   PetscFunctionReturn(0);
2847 }
2848 
2849 
2850 
2851 #undef __FUNCT__
2852 #define __FUNCT__ "MatLoad_MPIAIJ"
2853 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2854 {
2855   PetscScalar    *vals,*svals;
2856   MPI_Comm       comm;
2857   PetscErrorCode ierr;
2858   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2859   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2860   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2861   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2862   PetscInt       cend,cstart,n,*rowners;
2863   int            fd;
2864   PetscInt       bs = newMat->rmap->bs;
2865 
2866   PetscFunctionBegin;
2867   /* force binary viewer to load .info file if it has not yet done so */
2868   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2869   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2870   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2871   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2872   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2873   if (!rank) {
2874     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2875     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2876     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2877   }
2878 
2879   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2880   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2881   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2882   if (bs < 0) bs = 1;
2883 
2884   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2885   M    = header[1]; N = header[2];
2886 
2887   /* If global sizes are set, check if they are consistent with that given in the file */
2888   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2889   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2890 
2891   /* determine ownership of all (block) rows */
2892   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2893   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2894   else m = newMat->rmap->n; /* Set by user */
2895 
2896   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2897   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2898 
2899   /* First process needs enough room for process with most rows */
2900   if (!rank) {
2901     mmax = rowners[1];
2902     for (i=2; i<=size; i++) {
2903       mmax = PetscMax(mmax, rowners[i]);
2904     }
2905   } else mmax = -1;             /* unused, but compilers complain */
2906 
2907   rowners[0] = 0;
2908   for (i=2; i<=size; i++) {
2909     rowners[i] += rowners[i-1];
2910   }
2911   rstart = rowners[rank];
2912   rend   = rowners[rank+1];
2913 
2914   /* distribute row lengths to all processors */
2915   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2916   if (!rank) {
2917     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2918     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2919     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2920     for (j=0; j<m; j++) {
2921       procsnz[0] += ourlens[j];
2922     }
2923     for (i=1; i<size; i++) {
2924       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2925       /* calculate the number of nonzeros on each processor */
2926       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2927         procsnz[i] += rowlengths[j];
2928       }
2929       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2930     }
2931     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2932   } else {
2933     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2934   }
2935 
2936   if (!rank) {
2937     /* determine max buffer needed and allocate it */
2938     maxnz = 0;
2939     for (i=0; i<size; i++) {
2940       maxnz = PetscMax(maxnz,procsnz[i]);
2941     }
2942     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2943 
2944     /* read in my part of the matrix column indices  */
2945     nz   = procsnz[0];
2946     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2947     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2948 
2949     /* read in every one elses and ship off */
2950     for (i=1; i<size; i++) {
2951       nz   = procsnz[i];
2952       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2953       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2954     }
2955     ierr = PetscFree(cols);CHKERRQ(ierr);
2956   } else {
2957     /* determine buffer space needed for message */
2958     nz = 0;
2959     for (i=0; i<m; i++) {
2960       nz += ourlens[i];
2961     }
2962     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2963 
2964     /* receive message of column indices*/
2965     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2966   }
2967 
2968   /* determine column ownership if matrix is not square */
2969   if (N != M) {
2970     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2971     else n = newMat->cmap->n;
2972     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2973     cstart = cend - n;
2974   } else {
2975     cstart = rstart;
2976     cend   = rend;
2977     n      = cend - cstart;
2978   }
2979 
2980   /* loop over local rows, determining number of off diagonal entries */
2981   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2982   jj   = 0;
2983   for (i=0; i<m; i++) {
2984     for (j=0; j<ourlens[i]; j++) {
2985       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2986       jj++;
2987     }
2988   }
2989 
2990   for (i=0; i<m; i++) {
2991     ourlens[i] -= offlens[i];
2992   }
2993   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2994 
2995   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2996 
2997   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2998 
2999   for (i=0; i<m; i++) {
3000     ourlens[i] += offlens[i];
3001   }
3002 
3003   if (!rank) {
3004     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3005 
3006     /* read in my part of the matrix numerical values  */
3007     nz   = procsnz[0];
3008     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3009 
3010     /* insert into matrix */
3011     jj      = rstart;
3012     smycols = mycols;
3013     svals   = vals;
3014     for (i=0; i<m; i++) {
3015       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3016       smycols += ourlens[i];
3017       svals   += ourlens[i];
3018       jj++;
3019     }
3020 
3021     /* read in other processors and ship out */
3022     for (i=1; i<size; i++) {
3023       nz   = procsnz[i];
3024       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3025       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3026     }
3027     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3028   } else {
3029     /* receive numeric values */
3030     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3031 
3032     /* receive message of values*/
3033     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3034 
3035     /* insert into matrix */
3036     jj      = rstart;
3037     smycols = mycols;
3038     svals   = vals;
3039     for (i=0; i<m; i++) {
3040       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3041       smycols += ourlens[i];
3042       svals   += ourlens[i];
3043       jj++;
3044     }
3045   }
3046   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3047   ierr = PetscFree(vals);CHKERRQ(ierr);
3048   ierr = PetscFree(mycols);CHKERRQ(ierr);
3049   ierr = PetscFree(rowners);CHKERRQ(ierr);
3050   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3051   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 #undef __FUNCT__
3056 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3057 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3058 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3059 {
3060   PetscErrorCode ierr;
3061   IS             iscol_local;
3062   PetscInt       csize;
3063 
3064   PetscFunctionBegin;
3065   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3066   if (call == MAT_REUSE_MATRIX) {
3067     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3068     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3069   } else {
3070     /* check if we are grabbing all columns*/
3071     PetscBool    isstride;
3072     PetscMPIInt  lisstride = 0,gisstride;
3073     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3074     if (isstride) {
3075       PetscInt  start,len,mstart,mlen;
3076       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3077       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3078       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3079       if (mstart == start && mlen-mstart == len) lisstride = 1;
3080     }
3081     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3082     if (gisstride) {
3083       PetscInt N;
3084       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3085       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3086       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3087       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3088     } else {
3089       PetscInt cbs;
3090       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3091       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3092       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3093     }
3094   }
3095   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3096   if (call == MAT_INITIAL_MATRIX) {
3097     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3098     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3099   }
3100   PetscFunctionReturn(0);
3101 }
3102 
3103 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3104 #undef __FUNCT__
3105 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3106 /*
3107     Not great since it makes two copies of the submatrix, first an SeqAIJ
3108   in local and then by concatenating the local matrices the end result.
3109   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3110 
3111   Note: This requires a sequential iscol with all indices.
3112 */
3113 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3114 {
3115   PetscErrorCode ierr;
3116   PetscMPIInt    rank,size;
3117   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3118   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3119   PetscBool      allcolumns, colflag;
3120   Mat            M,Mreuse;
3121   MatScalar      *vwork,*aa;
3122   MPI_Comm       comm;
3123   Mat_SeqAIJ     *aij;
3124 
3125   PetscFunctionBegin;
3126   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3127   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3128   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3129 
3130   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3131   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3132   if (colflag && ncol == mat->cmap->N) {
3133     allcolumns = PETSC_TRUE;
3134     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3135   } else {
3136     allcolumns = PETSC_FALSE;
3137   }
3138   if (call ==  MAT_REUSE_MATRIX) {
3139     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3140     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3141     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3142   } else {
3143     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3144   }
3145 
3146   /*
3147       m - number of local rows
3148       n - number of columns (same on all processors)
3149       rstart - first row in new global matrix generated
3150   */
3151   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3152   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3153   if (call == MAT_INITIAL_MATRIX) {
3154     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3155     ii  = aij->i;
3156     jj  = aij->j;
3157 
3158     /*
3159         Determine the number of non-zeros in the diagonal and off-diagonal
3160         portions of the matrix in order to do correct preallocation
3161     */
3162 
3163     /* first get start and end of "diagonal" columns */
3164     if (csize == PETSC_DECIDE) {
3165       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3166       if (mglobal == n) { /* square matrix */
3167         nlocal = m;
3168       } else {
3169         nlocal = n/size + ((n % size) > rank);
3170       }
3171     } else {
3172       nlocal = csize;
3173     }
3174     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3175     rstart = rend - nlocal;
3176     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3177 
3178     /* next, compute all the lengths */
3179     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3180     olens = dlens + m;
3181     for (i=0; i<m; i++) {
3182       jend = ii[i+1] - ii[i];
3183       olen = 0;
3184       dlen = 0;
3185       for (j=0; j<jend; j++) {
3186         if (*jj < rstart || *jj >= rend) olen++;
3187         else dlen++;
3188         jj++;
3189       }
3190       olens[i] = olen;
3191       dlens[i] = dlen;
3192     }
3193     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3194     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3195     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3196     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3197     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3198     ierr = PetscFree(dlens);CHKERRQ(ierr);
3199   } else {
3200     PetscInt ml,nl;
3201 
3202     M    = *newmat;
3203     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3204     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3205     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3206     /*
3207          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3208        rather than the slower MatSetValues().
3209     */
3210     M->was_assembled = PETSC_TRUE;
3211     M->assembled     = PETSC_FALSE;
3212   }
3213   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3214   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3215   ii   = aij->i;
3216   jj   = aij->j;
3217   aa   = aij->a;
3218   for (i=0; i<m; i++) {
3219     row   = rstart + i;
3220     nz    = ii[i+1] - ii[i];
3221     cwork = jj;     jj += nz;
3222     vwork = aa;     aa += nz;
3223     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3224   }
3225 
3226   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3227   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3228   *newmat = M;
3229 
3230   /* save submatrix used in processor for next request */
3231   if (call ==  MAT_INITIAL_MATRIX) {
3232     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3233     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3234   }
3235   PetscFunctionReturn(0);
3236 }
3237 
3238 #undef __FUNCT__
3239 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3240 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3241 {
3242   PetscInt       m,cstart, cend,j,nnz,i,d;
3243   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3244   const PetscInt *JJ;
3245   PetscScalar    *values;
3246   PetscErrorCode ierr;
3247 
3248   PetscFunctionBegin;
3249   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3250 
3251   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3252   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3253   m      = B->rmap->n;
3254   cstart = B->cmap->rstart;
3255   cend   = B->cmap->rend;
3256   rstart = B->rmap->rstart;
3257 
3258   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3259 
3260 #if defined(PETSC_USE_DEBUGGING)
3261   for (i=0; i<m; i++) {
3262     nnz = Ii[i+1]- Ii[i];
3263     JJ  = J + Ii[i];
3264     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3265     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3266     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3267   }
3268 #endif
3269 
3270   for (i=0; i<m; i++) {
3271     nnz     = Ii[i+1]- Ii[i];
3272     JJ      = J + Ii[i];
3273     nnz_max = PetscMax(nnz_max,nnz);
3274     d       = 0;
3275     for (j=0; j<nnz; j++) {
3276       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3277     }
3278     d_nnz[i] = d;
3279     o_nnz[i] = nnz - d;
3280   }
3281   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3282   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3283 
3284   if (v) values = (PetscScalar*)v;
3285   else {
3286     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3287   }
3288 
3289   for (i=0; i<m; i++) {
3290     ii   = i + rstart;
3291     nnz  = Ii[i+1]- Ii[i];
3292     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3293   }
3294   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3295   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3296 
3297   if (!v) {
3298     ierr = PetscFree(values);CHKERRQ(ierr);
3299   }
3300   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3301   PetscFunctionReturn(0);
3302 }
3303 
3304 #undef __FUNCT__
3305 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3306 /*@
3307    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3308    (the default parallel PETSc format).
3309 
3310    Collective on MPI_Comm
3311 
3312    Input Parameters:
3313 +  B - the matrix
3314 .  i - the indices into j for the start of each local row (starts with zero)
3315 .  j - the column indices for each local row (starts with zero)
3316 -  v - optional values in the matrix
3317 
3318    Level: developer
3319 
3320    Notes:
3321        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3322      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3323      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3324 
3325        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3326 
3327        The format which is used for the sparse matrix input, is equivalent to a
3328     row-major ordering.. i.e for the following matrix, the input data expected is
3329     as shown
3330 
3331 $        1 0 0
3332 $        2 0 3     P0
3333 $       -------
3334 $        4 5 6     P1
3335 $
3336 $     Process0 [P0]: rows_owned=[0,1]
3337 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3338 $        j =  {0,0,2}  [size = 3]
3339 $        v =  {1,2,3}  [size = 3]
3340 $
3341 $     Process1 [P1]: rows_owned=[2]
3342 $        i =  {0,3}    [size = nrow+1  = 1+1]
3343 $        j =  {0,1,2}  [size = 3]
3344 $        v =  {4,5,6}  [size = 3]
3345 
3346 .keywords: matrix, aij, compressed row, sparse, parallel
3347 
3348 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3349           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3350 @*/
3351 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3352 {
3353   PetscErrorCode ierr;
3354 
3355   PetscFunctionBegin;
3356   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3357   PetscFunctionReturn(0);
3358 }
3359 
3360 #undef __FUNCT__
3361 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3362 /*@C
3363    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3364    (the default parallel PETSc format).  For good matrix assembly performance
3365    the user should preallocate the matrix storage by setting the parameters
3366    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3367    performance can be increased by more than a factor of 50.
3368 
3369    Collective on MPI_Comm
3370 
3371    Input Parameters:
3372 +  B - the matrix
3373 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3374            (same value is used for all local rows)
3375 .  d_nnz - array containing the number of nonzeros in the various rows of the
3376            DIAGONAL portion of the local submatrix (possibly different for each row)
3377            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3378            The size of this array is equal to the number of local rows, i.e 'm'.
3379            For matrices that will be factored, you must leave room for (and set)
3380            the diagonal entry even if it is zero.
3381 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3382            submatrix (same value is used for all local rows).
3383 -  o_nnz - array containing the number of nonzeros in the various rows of the
3384            OFF-DIAGONAL portion of the local submatrix (possibly different for
3385            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3386            structure. The size of this array is equal to the number
3387            of local rows, i.e 'm'.
3388 
3389    If the *_nnz parameter is given then the *_nz parameter is ignored
3390 
3391    The AIJ format (also called the Yale sparse matrix format or
3392    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3393    storage.  The stored row and column indices begin with zero.
3394    See Users-Manual: ch_mat for details.
3395 
3396    The parallel matrix is partitioned such that the first m0 rows belong to
3397    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3398    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3399 
3400    The DIAGONAL portion of the local submatrix of a processor can be defined
3401    as the submatrix which is obtained by extraction the part corresponding to
3402    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3403    first row that belongs to the processor, r2 is the last row belonging to
3404    the this processor, and c1-c2 is range of indices of the local part of a
3405    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3406    common case of a square matrix, the row and column ranges are the same and
3407    the DIAGONAL part is also square. The remaining portion of the local
3408    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3409 
3410    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3411 
3412    You can call MatGetInfo() to get information on how effective the preallocation was;
3413    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3414    You can also run with the option -info and look for messages with the string
3415    malloc in them to see if additional memory allocation was needed.
3416 
3417    Example usage:
3418 
3419    Consider the following 8x8 matrix with 34 non-zero values, that is
3420    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3421    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3422    as follows:
3423 
3424 .vb
3425             1  2  0  |  0  3  0  |  0  4
3426     Proc0   0  5  6  |  7  0  0  |  8  0
3427             9  0 10  | 11  0  0  | 12  0
3428     -------------------------------------
3429            13  0 14  | 15 16 17  |  0  0
3430     Proc1   0 18  0  | 19 20 21  |  0  0
3431             0  0  0  | 22 23  0  | 24  0
3432     -------------------------------------
3433     Proc2  25 26 27  |  0  0 28  | 29  0
3434            30  0  0  | 31 32 33  |  0 34
3435 .ve
3436 
3437    This can be represented as a collection of submatrices as:
3438 
3439 .vb
3440       A B C
3441       D E F
3442       G H I
3443 .ve
3444 
3445    Where the submatrices A,B,C are owned by proc0, D,E,F are
3446    owned by proc1, G,H,I are owned by proc2.
3447 
3448    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3449    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3450    The 'M','N' parameters are 8,8, and have the same values on all procs.
3451 
3452    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3453    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3454    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3455    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3456    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3457    matrix, ans [DF] as another SeqAIJ matrix.
3458 
3459    When d_nz, o_nz parameters are specified, d_nz storage elements are
3460    allocated for every row of the local diagonal submatrix, and o_nz
3461    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3462    One way to choose d_nz and o_nz is to use the max nonzerors per local
3463    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3464    In this case, the values of d_nz,o_nz are:
3465 .vb
3466      proc0 : dnz = 2, o_nz = 2
3467      proc1 : dnz = 3, o_nz = 2
3468      proc2 : dnz = 1, o_nz = 4
3469 .ve
3470    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3471    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3472    for proc3. i.e we are using 12+15+10=37 storage locations to store
3473    34 values.
3474 
3475    When d_nnz, o_nnz parameters are specified, the storage is specified
3476    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3477    In the above case the values for d_nnz,o_nnz are:
3478 .vb
3479      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3480      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3481      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3482 .ve
3483    Here the space allocated is sum of all the above values i.e 34, and
3484    hence pre-allocation is perfect.
3485 
3486    Level: intermediate
3487 
3488 .keywords: matrix, aij, compressed row, sparse, parallel
3489 
3490 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3491           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3492 @*/
3493 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3494 {
3495   PetscErrorCode ierr;
3496 
3497   PetscFunctionBegin;
3498   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3499   PetscValidType(B,1);
3500   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3501   PetscFunctionReturn(0);
3502 }
3503 
3504 #undef __FUNCT__
3505 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3506 /*@
3507      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3508          CSR format the local rows.
3509 
3510    Collective on MPI_Comm
3511 
3512    Input Parameters:
3513 +  comm - MPI communicator
3514 .  m - number of local rows (Cannot be PETSC_DECIDE)
3515 .  n - This value should be the same as the local size used in creating the
3516        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3517        calculated if N is given) For square matrices n is almost always m.
3518 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3519 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3520 .   i - row indices
3521 .   j - column indices
3522 -   a - matrix values
3523 
3524    Output Parameter:
3525 .   mat - the matrix
3526 
3527    Level: intermediate
3528 
3529    Notes:
3530        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3531      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3532      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3533 
3534        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3535 
3536        The format which is used for the sparse matrix input, is equivalent to a
3537     row-major ordering.. i.e for the following matrix, the input data expected is
3538     as shown
3539 
3540 $        1 0 0
3541 $        2 0 3     P0
3542 $       -------
3543 $        4 5 6     P1
3544 $
3545 $     Process0 [P0]: rows_owned=[0,1]
3546 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3547 $        j =  {0,0,2}  [size = 3]
3548 $        v =  {1,2,3}  [size = 3]
3549 $
3550 $     Process1 [P1]: rows_owned=[2]
3551 $        i =  {0,3}    [size = nrow+1  = 1+1]
3552 $        j =  {0,1,2}  [size = 3]
3553 $        v =  {4,5,6}  [size = 3]
3554 
3555 .keywords: matrix, aij, compressed row, sparse, parallel
3556 
3557 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3558           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3559 @*/
3560 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3561 {
3562   PetscErrorCode ierr;
3563 
3564   PetscFunctionBegin;
3565   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3566   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3567   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3568   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3569   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3570   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3571   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3572   PetscFunctionReturn(0);
3573 }
3574 
3575 #undef __FUNCT__
3576 #define __FUNCT__ "MatCreateAIJ"
3577 /*@C
3578    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3579    (the default parallel PETSc format).  For good matrix assembly performance
3580    the user should preallocate the matrix storage by setting the parameters
3581    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3582    performance can be increased by more than a factor of 50.
3583 
3584    Collective on MPI_Comm
3585 
3586    Input Parameters:
3587 +  comm - MPI communicator
3588 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3589            This value should be the same as the local size used in creating the
3590            y vector for the matrix-vector product y = Ax.
3591 .  n - This value should be the same as the local size used in creating the
3592        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3593        calculated if N is given) For square matrices n is almost always m.
3594 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3595 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3596 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3597            (same value is used for all local rows)
3598 .  d_nnz - array containing the number of nonzeros in the various rows of the
3599            DIAGONAL portion of the local submatrix (possibly different for each row)
3600            or NULL, if d_nz is used to specify the nonzero structure.
3601            The size of this array is equal to the number of local rows, i.e 'm'.
3602 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3603            submatrix (same value is used for all local rows).
3604 -  o_nnz - array containing the number of nonzeros in the various rows of the
3605            OFF-DIAGONAL portion of the local submatrix (possibly different for
3606            each row) or NULL, if o_nz is used to specify the nonzero
3607            structure. The size of this array is equal to the number
3608            of local rows, i.e 'm'.
3609 
3610    Output Parameter:
3611 .  A - the matrix
3612 
3613    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3614    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3615    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3616 
3617    Notes:
3618    If the *_nnz parameter is given then the *_nz parameter is ignored
3619 
3620    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3621    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3622    storage requirements for this matrix.
3623 
3624    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3625    processor than it must be used on all processors that share the object for
3626    that argument.
3627 
3628    The user MUST specify either the local or global matrix dimensions
3629    (possibly both).
3630 
3631    The parallel matrix is partitioned across processors such that the
3632    first m0 rows belong to process 0, the next m1 rows belong to
3633    process 1, the next m2 rows belong to process 2 etc.. where
3634    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3635    values corresponding to [m x N] submatrix.
3636 
3637    The columns are logically partitioned with the n0 columns belonging
3638    to 0th partition, the next n1 columns belonging to the next
3639    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3640 
3641    The DIAGONAL portion of the local submatrix on any given processor
3642    is the submatrix corresponding to the rows and columns m,n
3643    corresponding to the given processor. i.e diagonal matrix on
3644    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3645    etc. The remaining portion of the local submatrix [m x (N-n)]
3646    constitute the OFF-DIAGONAL portion. The example below better
3647    illustrates this concept.
3648 
3649    For a square global matrix we define each processor's diagonal portion
3650    to be its local rows and the corresponding columns (a square submatrix);
3651    each processor's off-diagonal portion encompasses the remainder of the
3652    local matrix (a rectangular submatrix).
3653 
3654    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3655 
3656    When calling this routine with a single process communicator, a matrix of
3657    type SEQAIJ is returned.  If a matrix of type MATMPIAIJ is desired for this
3658    type of communicator, use the construction mechanism:
3659      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3660 
3661    By default, this format uses inodes (identical nodes) when possible.
3662    We search for consecutive rows with the same nonzero structure, thereby
3663    reusing matrix information to achieve increased efficiency.
3664 
3665    Options Database Keys:
3666 +  -mat_no_inode  - Do not use inodes
3667 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3668 -  -mat_aij_oneindex - Internally use indexing starting at 1
3669         rather than 0.  Note that when calling MatSetValues(),
3670         the user still MUST index entries starting at 0!
3671 
3672 
3673    Example usage:
3674 
3675    Consider the following 8x8 matrix with 34 non-zero values, that is
3676    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3677    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3678    as follows:
3679 
3680 .vb
3681             1  2  0  |  0  3  0  |  0  4
3682     Proc0   0  5  6  |  7  0  0  |  8  0
3683             9  0 10  | 11  0  0  | 12  0
3684     -------------------------------------
3685            13  0 14  | 15 16 17  |  0  0
3686     Proc1   0 18  0  | 19 20 21  |  0  0
3687             0  0  0  | 22 23  0  | 24  0
3688     -------------------------------------
3689     Proc2  25 26 27  |  0  0 28  | 29  0
3690            30  0  0  | 31 32 33  |  0 34
3691 .ve
3692 
3693    This can be represented as a collection of submatrices as:
3694 
3695 .vb
3696       A B C
3697       D E F
3698       G H I
3699 .ve
3700 
3701    Where the submatrices A,B,C are owned by proc0, D,E,F are
3702    owned by proc1, G,H,I are owned by proc2.
3703 
3704    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3705    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3706    The 'M','N' parameters are 8,8, and have the same values on all procs.
3707 
3708    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3709    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3710    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3711    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3712    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3713    matrix, ans [DF] as another SeqAIJ matrix.
3714 
3715    When d_nz, o_nz parameters are specified, d_nz storage elements are
3716    allocated for every row of the local diagonal submatrix, and o_nz
3717    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3718    One way to choose d_nz and o_nz is to use the max nonzerors per local
3719    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3720    In this case, the values of d_nz,o_nz are:
3721 .vb
3722      proc0 : dnz = 2, o_nz = 2
3723      proc1 : dnz = 3, o_nz = 2
3724      proc2 : dnz = 1, o_nz = 4
3725 .ve
3726    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3727    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3728    for proc3. i.e we are using 12+15+10=37 storage locations to store
3729    34 values.
3730 
3731    When d_nnz, o_nnz parameters are specified, the storage is specified
3732    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3733    In the above case the values for d_nnz,o_nnz are:
3734 .vb
3735      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3736      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3737      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3738 .ve
3739    Here the space allocated is sum of all the above values i.e 34, and
3740    hence pre-allocation is perfect.
3741 
3742    Level: intermediate
3743 
3744 .keywords: matrix, aij, compressed row, sparse, parallel
3745 
3746 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3747           MATMPIAIJ, MatCreateMPIAIJWithArrays()
3748 @*/
3749 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3750 {
3751   PetscErrorCode ierr;
3752   PetscMPIInt    size;
3753 
3754   PetscFunctionBegin;
3755   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3756   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3757   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3758   if (size > 1) {
3759     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3760     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3761   } else {
3762     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3763     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3764   }
3765   PetscFunctionReturn(0);
3766 }
3767 
3768 #undef __FUNCT__
3769 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3770 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3771 {
3772   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3773   PetscBool      flg;
3774   PetscErrorCode ierr;
3775 
3776   PetscFunctionBegin;
3777   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3778   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
3779   if (Ad)     *Ad     = a->A;
3780   if (Ao)     *Ao     = a->B;
3781   if (colmap) *colmap = a->garray;
3782   PetscFunctionReturn(0);
3783 }
3784 
3785 #undef __FUNCT__
3786 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3787 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3788 {
3789   PetscErrorCode ierr;
3790   PetscInt       m,N,i,rstart,nnz,Ii;
3791   PetscInt       *indx;
3792   PetscScalar    *values;
3793 
3794   PetscFunctionBegin;
3795   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3796   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3797     PetscInt       *dnz,*onz,sum,bs,cbs;
3798 
3799     if (n == PETSC_DECIDE) {
3800       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3801     }
3802     /* Check sum(n) = N */
3803     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3804     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3805 
3806     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3807     rstart -= m;
3808 
3809     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3810     for (i=0; i<m; i++) {
3811       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3812       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3813       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3814     }
3815 
3816     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3817     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3818     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3819     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3820     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3821     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3822     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3823   }
3824 
3825   /* numeric phase */
3826   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3827   for (i=0; i<m; i++) {
3828     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3829     Ii   = i + rstart;
3830     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3831     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3832   }
3833   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3834   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3835   PetscFunctionReturn(0);
3836 }
3837 
3838 #undef __FUNCT__
3839 #define __FUNCT__ "MatFileSplit"
3840 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3841 {
3842   PetscErrorCode    ierr;
3843   PetscMPIInt       rank;
3844   PetscInt          m,N,i,rstart,nnz;
3845   size_t            len;
3846   const PetscInt    *indx;
3847   PetscViewer       out;
3848   char              *name;
3849   Mat               B;
3850   const PetscScalar *values;
3851 
3852   PetscFunctionBegin;
3853   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3854   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3855   /* Should this be the type of the diagonal block of A? */
3856   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3857   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3858   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3859   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3860   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3861   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3862   for (i=0; i<m; i++) {
3863     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3864     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3865     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3866   }
3867   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3868   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3869 
3870   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3871   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3872   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3873   sprintf(name,"%s.%d",outfile,rank);
3874   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3875   ierr = PetscFree(name);CHKERRQ(ierr);
3876   ierr = MatView(B,out);CHKERRQ(ierr);
3877   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3878   ierr = MatDestroy(&B);CHKERRQ(ierr);
3879   PetscFunctionReturn(0);
3880 }
3881 
3882 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3883 #undef __FUNCT__
3884 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3885 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3886 {
3887   PetscErrorCode      ierr;
3888   Mat_Merge_SeqsToMPI *merge;
3889   PetscContainer      container;
3890 
3891   PetscFunctionBegin;
3892   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3893   if (container) {
3894     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3895     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3896     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3897     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3898     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3899     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3900     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3901     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3902     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3903     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3904     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3905     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3906     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3907     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3908     ierr = PetscFree(merge);CHKERRQ(ierr);
3909     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3910   }
3911   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3912   PetscFunctionReturn(0);
3913 }
3914 
3915 #include <../src/mat/utils/freespace.h>
3916 #include <petscbt.h>
3917 
3918 #undef __FUNCT__
3919 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3920 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3921 {
3922   PetscErrorCode      ierr;
3923   MPI_Comm            comm;
3924   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3925   PetscMPIInt         size,rank,taga,*len_s;
3926   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3927   PetscInt            proc,m;
3928   PetscInt            **buf_ri,**buf_rj;
3929   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3930   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3931   MPI_Request         *s_waits,*r_waits;
3932   MPI_Status          *status;
3933   MatScalar           *aa=a->a;
3934   MatScalar           **abuf_r,*ba_i;
3935   Mat_Merge_SeqsToMPI *merge;
3936   PetscContainer      container;
3937 
3938   PetscFunctionBegin;
3939   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3940   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3941 
3942   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3943   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3944 
3945   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3946   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3947 
3948   bi     = merge->bi;
3949   bj     = merge->bj;
3950   buf_ri = merge->buf_ri;
3951   buf_rj = merge->buf_rj;
3952 
3953   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3954   owners = merge->rowmap->range;
3955   len_s  = merge->len_s;
3956 
3957   /* send and recv matrix values */
3958   /*-----------------------------*/
3959   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3960   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3961 
3962   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3963   for (proc=0,k=0; proc<size; proc++) {
3964     if (!len_s[proc]) continue;
3965     i    = owners[proc];
3966     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3967     k++;
3968   }
3969 
3970   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3971   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3972   ierr = PetscFree(status);CHKERRQ(ierr);
3973 
3974   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3975   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3976 
3977   /* insert mat values of mpimat */
3978   /*----------------------------*/
3979   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3980   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3981 
3982   for (k=0; k<merge->nrecv; k++) {
3983     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3984     nrows       = *(buf_ri_k[k]);
3985     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3986     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
3987   }
3988 
3989   /* set values of ba */
3990   m = merge->rowmap->n;
3991   for (i=0; i<m; i++) {
3992     arow = owners[rank] + i;
3993     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3994     bnzi = bi[i+1] - bi[i];
3995     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3996 
3997     /* add local non-zero vals of this proc's seqmat into ba */
3998     anzi   = ai[arow+1] - ai[arow];
3999     aj     = a->j + ai[arow];
4000     aa     = a->a + ai[arow];
4001     nextaj = 0;
4002     for (j=0; nextaj<anzi; j++) {
4003       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4004         ba_i[j] += aa[nextaj++];
4005       }
4006     }
4007 
4008     /* add received vals into ba */
4009     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4010       /* i-th row */
4011       if (i == *nextrow[k]) {
4012         anzi   = *(nextai[k]+1) - *nextai[k];
4013         aj     = buf_rj[k] + *(nextai[k]);
4014         aa     = abuf_r[k] + *(nextai[k]);
4015         nextaj = 0;
4016         for (j=0; nextaj<anzi; j++) {
4017           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4018             ba_i[j] += aa[nextaj++];
4019           }
4020         }
4021         nextrow[k]++; nextai[k]++;
4022       }
4023     }
4024     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4025   }
4026   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4027   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4028 
4029   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4030   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4031   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4032   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4033   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4034   PetscFunctionReturn(0);
4035 }
4036 
4037 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4038 
4039 #undef __FUNCT__
4040 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4041 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4042 {
4043   PetscErrorCode      ierr;
4044   Mat                 B_mpi;
4045   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4046   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4047   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4048   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4049   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4050   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4051   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4052   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4053   MPI_Status          *status;
4054   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4055   PetscBT             lnkbt;
4056   Mat_Merge_SeqsToMPI *merge;
4057   PetscContainer      container;
4058 
4059   PetscFunctionBegin;
4060   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4061 
4062   /* make sure it is a PETSc comm */
4063   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4064   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4065   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4066 
4067   ierr = PetscNew(&merge);CHKERRQ(ierr);
4068   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4069 
4070   /* determine row ownership */
4071   /*---------------------------------------------------------*/
4072   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4073   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4074   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4075   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4076   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4077   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4078   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4079 
4080   m      = merge->rowmap->n;
4081   owners = merge->rowmap->range;
4082 
4083   /* determine the number of messages to send, their lengths */
4084   /*---------------------------------------------------------*/
4085   len_s = merge->len_s;
4086 
4087   len          = 0; /* length of buf_si[] */
4088   merge->nsend = 0;
4089   for (proc=0; proc<size; proc++) {
4090     len_si[proc] = 0;
4091     if (proc == rank) {
4092       len_s[proc] = 0;
4093     } else {
4094       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4095       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4096     }
4097     if (len_s[proc]) {
4098       merge->nsend++;
4099       nrows = 0;
4100       for (i=owners[proc]; i<owners[proc+1]; i++) {
4101         if (ai[i+1] > ai[i]) nrows++;
4102       }
4103       len_si[proc] = 2*(nrows+1);
4104       len         += len_si[proc];
4105     }
4106   }
4107 
4108   /* determine the number and length of messages to receive for ij-structure */
4109   /*-------------------------------------------------------------------------*/
4110   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4111   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4112 
4113   /* post the Irecv of j-structure */
4114   /*-------------------------------*/
4115   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4116   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4117 
4118   /* post the Isend of j-structure */
4119   /*--------------------------------*/
4120   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4121 
4122   for (proc=0, k=0; proc<size; proc++) {
4123     if (!len_s[proc]) continue;
4124     i    = owners[proc];
4125     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4126     k++;
4127   }
4128 
4129   /* receives and sends of j-structure are complete */
4130   /*------------------------------------------------*/
4131   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4132   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4133 
4134   /* send and recv i-structure */
4135   /*---------------------------*/
4136   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4137   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4138 
4139   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4140   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4141   for (proc=0,k=0; proc<size; proc++) {
4142     if (!len_s[proc]) continue;
4143     /* form outgoing message for i-structure:
4144          buf_si[0]:                 nrows to be sent
4145                [1:nrows]:           row index (global)
4146                [nrows+1:2*nrows+1]: i-structure index
4147     */
4148     /*-------------------------------------------*/
4149     nrows       = len_si[proc]/2 - 1;
4150     buf_si_i    = buf_si + nrows+1;
4151     buf_si[0]   = nrows;
4152     buf_si_i[0] = 0;
4153     nrows       = 0;
4154     for (i=owners[proc]; i<owners[proc+1]; i++) {
4155       anzi = ai[i+1] - ai[i];
4156       if (anzi) {
4157         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4158         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4159         nrows++;
4160       }
4161     }
4162     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4163     k++;
4164     buf_si += len_si[proc];
4165   }
4166 
4167   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4168   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4169 
4170   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4171   for (i=0; i<merge->nrecv; i++) {
4172     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4173   }
4174 
4175   ierr = PetscFree(len_si);CHKERRQ(ierr);
4176   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4177   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4178   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4179   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4180   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4181   ierr = PetscFree(status);CHKERRQ(ierr);
4182 
4183   /* compute a local seq matrix in each processor */
4184   /*----------------------------------------------*/
4185   /* allocate bi array and free space for accumulating nonzero column info */
4186   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4187   bi[0] = 0;
4188 
4189   /* create and initialize a linked list */
4190   nlnk = N+1;
4191   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4192 
4193   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4194   len  = ai[owners[rank+1]] - ai[owners[rank]];
4195   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4196 
4197   current_space = free_space;
4198 
4199   /* determine symbolic info for each local row */
4200   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4201 
4202   for (k=0; k<merge->nrecv; k++) {
4203     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4204     nrows       = *buf_ri_k[k];
4205     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4206     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4207   }
4208 
4209   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4210   len  = 0;
4211   for (i=0; i<m; i++) {
4212     bnzi = 0;
4213     /* add local non-zero cols of this proc's seqmat into lnk */
4214     arow  = owners[rank] + i;
4215     anzi  = ai[arow+1] - ai[arow];
4216     aj    = a->j + ai[arow];
4217     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4218     bnzi += nlnk;
4219     /* add received col data into lnk */
4220     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4221       if (i == *nextrow[k]) { /* i-th row */
4222         anzi  = *(nextai[k]+1) - *nextai[k];
4223         aj    = buf_rj[k] + *nextai[k];
4224         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4225         bnzi += nlnk;
4226         nextrow[k]++; nextai[k]++;
4227       }
4228     }
4229     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4230 
4231     /* if free space is not available, make more free space */
4232     if (current_space->local_remaining<bnzi) {
4233       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4234       nspacedouble++;
4235     }
4236     /* copy data into free space, then initialize lnk */
4237     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4238     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4239 
4240     current_space->array           += bnzi;
4241     current_space->local_used      += bnzi;
4242     current_space->local_remaining -= bnzi;
4243 
4244     bi[i+1] = bi[i] + bnzi;
4245   }
4246 
4247   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4248 
4249   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4250   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4251   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4252 
4253   /* create symbolic parallel matrix B_mpi */
4254   /*---------------------------------------*/
4255   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4256   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4257   if (n==PETSC_DECIDE) {
4258     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4259   } else {
4260     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4261   }
4262   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4263   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4264   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4265   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4266   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4267 
4268   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4269   B_mpi->assembled    = PETSC_FALSE;
4270   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4271   merge->bi           = bi;
4272   merge->bj           = bj;
4273   merge->buf_ri       = buf_ri;
4274   merge->buf_rj       = buf_rj;
4275   merge->coi          = NULL;
4276   merge->coj          = NULL;
4277   merge->owners_co    = NULL;
4278 
4279   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4280 
4281   /* attach the supporting struct to B_mpi for reuse */
4282   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4283   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4284   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4285   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4286   *mpimat = B_mpi;
4287 
4288   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4289   PetscFunctionReturn(0);
4290 }
4291 
4292 #undef __FUNCT__
4293 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4294 /*@C
4295       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4296                  matrices from each processor
4297 
4298     Collective on MPI_Comm
4299 
4300    Input Parameters:
4301 +    comm - the communicators the parallel matrix will live on
4302 .    seqmat - the input sequential matrices
4303 .    m - number of local rows (or PETSC_DECIDE)
4304 .    n - number of local columns (or PETSC_DECIDE)
4305 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4306 
4307    Output Parameter:
4308 .    mpimat - the parallel matrix generated
4309 
4310     Level: advanced
4311 
4312    Notes:
4313      The dimensions of the sequential matrix in each processor MUST be the same.
4314      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4315      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4316 @*/
4317 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4318 {
4319   PetscErrorCode ierr;
4320   PetscMPIInt    size;
4321 
4322   PetscFunctionBegin;
4323   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4324   if (size == 1) {
4325     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4326     if (scall == MAT_INITIAL_MATRIX) {
4327       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4328     } else {
4329       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4330     }
4331     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4332     PetscFunctionReturn(0);
4333   }
4334   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4335   if (scall == MAT_INITIAL_MATRIX) {
4336     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4337   }
4338   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4339   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4340   PetscFunctionReturn(0);
4341 }
4342 
4343 #undef __FUNCT__
4344 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4345 /*@
4346      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4347           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4348           with MatGetSize()
4349 
4350     Not Collective
4351 
4352    Input Parameters:
4353 +    A - the matrix
4354 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4355 
4356    Output Parameter:
4357 .    A_loc - the local sequential matrix generated
4358 
4359     Level: developer
4360 
4361 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4362 
4363 @*/
4364 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4365 {
4366   PetscErrorCode ierr;
4367   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4368   Mat_SeqAIJ     *mat,*a,*b;
4369   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4370   MatScalar      *aa,*ba,*cam;
4371   PetscScalar    *ca;
4372   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4373   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4374   PetscBool      match;
4375   MPI_Comm       comm;
4376   PetscMPIInt    size;
4377 
4378   PetscFunctionBegin;
4379   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4380   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4381   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4382   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4383   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4384 
4385   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4386   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4387   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4388   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4389   aa = a->a; ba = b->a;
4390   if (scall == MAT_INITIAL_MATRIX) {
4391     if (size == 1) {
4392       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4393       PetscFunctionReturn(0);
4394     }
4395 
4396     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4397     ci[0] = 0;
4398     for (i=0; i<am; i++) {
4399       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4400     }
4401     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4402     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4403     k    = 0;
4404     for (i=0; i<am; i++) {
4405       ncols_o = bi[i+1] - bi[i];
4406       ncols_d = ai[i+1] - ai[i];
4407       /* off-diagonal portion of A */
4408       for (jo=0; jo<ncols_o; jo++) {
4409         col = cmap[*bj];
4410         if (col >= cstart) break;
4411         cj[k]   = col; bj++;
4412         ca[k++] = *ba++;
4413       }
4414       /* diagonal portion of A */
4415       for (j=0; j<ncols_d; j++) {
4416         cj[k]   = cstart + *aj++;
4417         ca[k++] = *aa++;
4418       }
4419       /* off-diagonal portion of A */
4420       for (j=jo; j<ncols_o; j++) {
4421         cj[k]   = cmap[*bj++];
4422         ca[k++] = *ba++;
4423       }
4424     }
4425     /* put together the new matrix */
4426     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4427     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4428     /* Since these are PETSc arrays, change flags to free them as necessary. */
4429     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4430     mat->free_a  = PETSC_TRUE;
4431     mat->free_ij = PETSC_TRUE;
4432     mat->nonew   = 0;
4433   } else if (scall == MAT_REUSE_MATRIX) {
4434     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4435     ci = mat->i; cj = mat->j; cam = mat->a;
4436     for (i=0; i<am; i++) {
4437       /* off-diagonal portion of A */
4438       ncols_o = bi[i+1] - bi[i];
4439       for (jo=0; jo<ncols_o; jo++) {
4440         col = cmap[*bj];
4441         if (col >= cstart) break;
4442         *cam++ = *ba++; bj++;
4443       }
4444       /* diagonal portion of A */
4445       ncols_d = ai[i+1] - ai[i];
4446       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4447       /* off-diagonal portion of A */
4448       for (j=jo; j<ncols_o; j++) {
4449         *cam++ = *ba++; bj++;
4450       }
4451     }
4452   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4453   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4454   PetscFunctionReturn(0);
4455 }
4456 
4457 #undef __FUNCT__
4458 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4459 /*@C
4460      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4461 
4462     Not Collective
4463 
4464    Input Parameters:
4465 +    A - the matrix
4466 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4467 -    row, col - index sets of rows and columns to extract (or NULL)
4468 
4469    Output Parameter:
4470 .    A_loc - the local sequential matrix generated
4471 
4472     Level: developer
4473 
4474 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4475 
4476 @*/
4477 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4478 {
4479   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4480   PetscErrorCode ierr;
4481   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4482   IS             isrowa,iscola;
4483   Mat            *aloc;
4484   PetscBool      match;
4485 
4486   PetscFunctionBegin;
4487   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4488   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4489   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4490   if (!row) {
4491     start = A->rmap->rstart; end = A->rmap->rend;
4492     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4493   } else {
4494     isrowa = *row;
4495   }
4496   if (!col) {
4497     start = A->cmap->rstart;
4498     cmap  = a->garray;
4499     nzA   = a->A->cmap->n;
4500     nzB   = a->B->cmap->n;
4501     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4502     ncols = 0;
4503     for (i=0; i<nzB; i++) {
4504       if (cmap[i] < start) idx[ncols++] = cmap[i];
4505       else break;
4506     }
4507     imark = i;
4508     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4509     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4510     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4511   } else {
4512     iscola = *col;
4513   }
4514   if (scall != MAT_INITIAL_MATRIX) {
4515     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4516     aloc[0] = *A_loc;
4517   }
4518   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4519   *A_loc = aloc[0];
4520   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4521   if (!row) {
4522     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4523   }
4524   if (!col) {
4525     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4526   }
4527   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4528   PetscFunctionReturn(0);
4529 }
4530 
4531 #undef __FUNCT__
4532 #define __FUNCT__ "MatGetBrowsOfAcols"
4533 /*@C
4534     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4535 
4536     Collective on Mat
4537 
4538    Input Parameters:
4539 +    A,B - the matrices in mpiaij format
4540 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4541 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4542 
4543    Output Parameter:
4544 +    rowb, colb - index sets of rows and columns of B to extract
4545 -    B_seq - the sequential matrix generated
4546 
4547     Level: developer
4548 
4549 @*/
4550 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4551 {
4552   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4553   PetscErrorCode ierr;
4554   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4555   IS             isrowb,iscolb;
4556   Mat            *bseq=NULL;
4557 
4558   PetscFunctionBegin;
4559   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4560     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4561   }
4562   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4563 
4564   if (scall == MAT_INITIAL_MATRIX) {
4565     start = A->cmap->rstart;
4566     cmap  = a->garray;
4567     nzA   = a->A->cmap->n;
4568     nzB   = a->B->cmap->n;
4569     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4570     ncols = 0;
4571     for (i=0; i<nzB; i++) {  /* row < local row index */
4572       if (cmap[i] < start) idx[ncols++] = cmap[i];
4573       else break;
4574     }
4575     imark = i;
4576     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4577     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4578     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4579     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4580   } else {
4581     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4582     isrowb  = *rowb; iscolb = *colb;
4583     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4584     bseq[0] = *B_seq;
4585   }
4586   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4587   *B_seq = bseq[0];
4588   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4589   if (!rowb) {
4590     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4591   } else {
4592     *rowb = isrowb;
4593   }
4594   if (!colb) {
4595     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4596   } else {
4597     *colb = iscolb;
4598   }
4599   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4600   PetscFunctionReturn(0);
4601 }
4602 
4603 #undef __FUNCT__
4604 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4605 /*
4606     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4607     of the OFF-DIAGONAL portion of local A
4608 
4609     Collective on Mat
4610 
4611    Input Parameters:
4612 +    A,B - the matrices in mpiaij format
4613 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4614 
4615    Output Parameter:
4616 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4617 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4618 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4619 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4620 
4621     Level: developer
4622 
4623 */
4624 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4625 {
4626   VecScatter_MPI_General *gen_to,*gen_from;
4627   PetscErrorCode         ierr;
4628   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4629   Mat_SeqAIJ             *b_oth;
4630   VecScatter             ctx =a->Mvctx;
4631   MPI_Comm               comm;
4632   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4633   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4634   PetscScalar            *rvalues,*svalues;
4635   MatScalar              *b_otha,*bufa,*bufA;
4636   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4637   MPI_Request            *rwaits = NULL,*swaits = NULL;
4638   MPI_Status             *sstatus,rstatus;
4639   PetscMPIInt            jj,size;
4640   PetscInt               *cols,sbs,rbs;
4641   PetscScalar            *vals;
4642 
4643   PetscFunctionBegin;
4644   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4645   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4646 
4647   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4648     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4649   }
4650   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4651   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4652 
4653   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4654   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4655   rvalues  = gen_from->values; /* holds the length of receiving row */
4656   svalues  = gen_to->values;   /* holds the length of sending row */
4657   nrecvs   = gen_from->n;
4658   nsends   = gen_to->n;
4659 
4660   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4661   srow    = gen_to->indices;    /* local row index to be sent */
4662   sstarts = gen_to->starts;
4663   sprocs  = gen_to->procs;
4664   sstatus = gen_to->sstatus;
4665   sbs     = gen_to->bs;
4666   rstarts = gen_from->starts;
4667   rprocs  = gen_from->procs;
4668   rbs     = gen_from->bs;
4669 
4670   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4671   if (scall == MAT_INITIAL_MATRIX) {
4672     /* i-array */
4673     /*---------*/
4674     /*  post receives */
4675     for (i=0; i<nrecvs; i++) {
4676       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4677       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4678       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4679     }
4680 
4681     /* pack the outgoing message */
4682     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4683 
4684     sstartsj[0] = 0;
4685     rstartsj[0] = 0;
4686     len         = 0; /* total length of j or a array to be sent */
4687     k           = 0;
4688     for (i=0; i<nsends; i++) {
4689       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4690       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4691       for (j=0; j<nrows; j++) {
4692         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4693         for (l=0; l<sbs; l++) {
4694           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4695 
4696           rowlen[j*sbs+l] = ncols;
4697 
4698           len += ncols;
4699           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4700         }
4701         k++;
4702       }
4703       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4704 
4705       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4706     }
4707     /* recvs and sends of i-array are completed */
4708     i = nrecvs;
4709     while (i--) {
4710       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4711     }
4712     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4713 
4714     /* allocate buffers for sending j and a arrays */
4715     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4716     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4717 
4718     /* create i-array of B_oth */
4719     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4720 
4721     b_othi[0] = 0;
4722     len       = 0; /* total length of j or a array to be received */
4723     k         = 0;
4724     for (i=0; i<nrecvs; i++) {
4725       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4726       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4727       for (j=0; j<nrows; j++) {
4728         b_othi[k+1] = b_othi[k] + rowlen[j];
4729         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4730         k++;
4731       }
4732       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4733     }
4734 
4735     /* allocate space for j and a arrrays of B_oth */
4736     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4737     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4738 
4739     /* j-array */
4740     /*---------*/
4741     /*  post receives of j-array */
4742     for (i=0; i<nrecvs; i++) {
4743       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4744       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4745     }
4746 
4747     /* pack the outgoing message j-array */
4748     k = 0;
4749     for (i=0; i<nsends; i++) {
4750       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4751       bufJ  = bufj+sstartsj[i];
4752       for (j=0; j<nrows; j++) {
4753         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4754         for (ll=0; ll<sbs; ll++) {
4755           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4756           for (l=0; l<ncols; l++) {
4757             *bufJ++ = cols[l];
4758           }
4759           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4760         }
4761       }
4762       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4763     }
4764 
4765     /* recvs and sends of j-array are completed */
4766     i = nrecvs;
4767     while (i--) {
4768       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4769     }
4770     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4771   } else if (scall == MAT_REUSE_MATRIX) {
4772     sstartsj = *startsj_s;
4773     rstartsj = *startsj_r;
4774     bufa     = *bufa_ptr;
4775     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4776     b_otha   = b_oth->a;
4777   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4778 
4779   /* a-array */
4780   /*---------*/
4781   /*  post receives of a-array */
4782   for (i=0; i<nrecvs; i++) {
4783     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4784     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4785   }
4786 
4787   /* pack the outgoing message a-array */
4788   k = 0;
4789   for (i=0; i<nsends; i++) {
4790     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4791     bufA  = bufa+sstartsj[i];
4792     for (j=0; j<nrows; j++) {
4793       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4794       for (ll=0; ll<sbs; ll++) {
4795         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4796         for (l=0; l<ncols; l++) {
4797           *bufA++ = vals[l];
4798         }
4799         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4800       }
4801     }
4802     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4803   }
4804   /* recvs and sends of a-array are completed */
4805   i = nrecvs;
4806   while (i--) {
4807     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4808   }
4809   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4810   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4811 
4812   if (scall == MAT_INITIAL_MATRIX) {
4813     /* put together the new matrix */
4814     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4815 
4816     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4817     /* Since these are PETSc arrays, change flags to free them as necessary. */
4818     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4819     b_oth->free_a  = PETSC_TRUE;
4820     b_oth->free_ij = PETSC_TRUE;
4821     b_oth->nonew   = 0;
4822 
4823     ierr = PetscFree(bufj);CHKERRQ(ierr);
4824     if (!startsj_s || !bufa_ptr) {
4825       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4826       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4827     } else {
4828       *startsj_s = sstartsj;
4829       *startsj_r = rstartsj;
4830       *bufa_ptr  = bufa;
4831     }
4832   }
4833   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4834   PetscFunctionReturn(0);
4835 }
4836 
4837 #undef __FUNCT__
4838 #define __FUNCT__ "MatGetCommunicationStructs"
4839 /*@C
4840   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4841 
4842   Not Collective
4843 
4844   Input Parameters:
4845 . A - The matrix in mpiaij format
4846 
4847   Output Parameter:
4848 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4849 . colmap - A map from global column index to local index into lvec
4850 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4851 
4852   Level: developer
4853 
4854 @*/
4855 #if defined(PETSC_USE_CTABLE)
4856 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4857 #else
4858 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4859 #endif
4860 {
4861   Mat_MPIAIJ *a;
4862 
4863   PetscFunctionBegin;
4864   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4865   PetscValidPointer(lvec, 2);
4866   PetscValidPointer(colmap, 3);
4867   PetscValidPointer(multScatter, 4);
4868   a = (Mat_MPIAIJ*) A->data;
4869   if (lvec) *lvec = a->lvec;
4870   if (colmap) *colmap = a->colmap;
4871   if (multScatter) *multScatter = a->Mvctx;
4872   PetscFunctionReturn(0);
4873 }
4874 
4875 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4876 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4877 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4878 #if defined(PETSC_HAVE_ELEMENTAL)
4879 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4880 #endif
4881 #if defined(PETSC_HAVE_HYPRE)
4882 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
4883 #endif
4884 
4885 #undef __FUNCT__
4886 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4887 /*
4888     Computes (B'*A')' since computing B*A directly is untenable
4889 
4890                n                       p                          p
4891         (              )       (              )         (                  )
4892       m (      A       )  *  n (       B      )   =   m (         C        )
4893         (              )       (              )         (                  )
4894 
4895 */
4896 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4897 {
4898   PetscErrorCode ierr;
4899   Mat            At,Bt,Ct;
4900 
4901   PetscFunctionBegin;
4902   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4903   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4904   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4905   ierr = MatDestroy(&At);CHKERRQ(ierr);
4906   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4907   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4908   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4909   PetscFunctionReturn(0);
4910 }
4911 
4912 #undef __FUNCT__
4913 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4914 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4915 {
4916   PetscErrorCode ierr;
4917   PetscInt       m=A->rmap->n,n=B->cmap->n;
4918   Mat            Cmat;
4919 
4920   PetscFunctionBegin;
4921   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4922   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4923   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4924   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4925   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4926   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4927   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4928   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4929 
4930   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4931 
4932   *C = Cmat;
4933   PetscFunctionReturn(0);
4934 }
4935 
4936 /* ----------------------------------------------------------------*/
4937 #undef __FUNCT__
4938 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4939 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4940 {
4941   PetscErrorCode ierr;
4942 
4943   PetscFunctionBegin;
4944   if (scall == MAT_INITIAL_MATRIX) {
4945     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4946     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4947     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4948   }
4949   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4950   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4951   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4952   PetscFunctionReturn(0);
4953 }
4954 
4955 /*MC
4956    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4957 
4958    Options Database Keys:
4959 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4960 
4961   Level: beginner
4962 
4963 .seealso: MatCreateAIJ()
4964 M*/
4965 
4966 #undef __FUNCT__
4967 #define __FUNCT__ "MatCreate_MPIAIJ"
4968 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4969 {
4970   Mat_MPIAIJ     *b;
4971   PetscErrorCode ierr;
4972   PetscMPIInt    size;
4973 
4974   PetscFunctionBegin;
4975   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4976 
4977   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4978   B->data       = (void*)b;
4979   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4980   B->assembled  = PETSC_FALSE;
4981   B->insertmode = NOT_SET_VALUES;
4982   b->size       = size;
4983 
4984   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
4985 
4986   /* build cache for off array entries formed */
4987   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
4988 
4989   b->donotstash  = PETSC_FALSE;
4990   b->colmap      = 0;
4991   b->garray      = 0;
4992   b->roworiented = PETSC_TRUE;
4993 
4994   /* stuff used for matrix vector multiply */
4995   b->lvec  = NULL;
4996   b->Mvctx = NULL;
4997 
4998   /* stuff for MatGetRow() */
4999   b->rowindices   = 0;
5000   b->rowvalues    = 0;
5001   b->getrowactive = PETSC_FALSE;
5002 
5003   /* flexible pointer used in CUSP/CUSPARSE classes */
5004   b->spptr = NULL;
5005 
5006   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5007   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5008   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5009   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5010   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5011   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5012   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5013   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5014   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5015   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5016 #if defined(PETSC_HAVE_ELEMENTAL)
5017   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5018 #endif
5019 #if defined(PETSC_HAVE_HYPRE)
5020   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5021 #endif
5022   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5023   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5024   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5025   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5026   PetscFunctionReturn(0);
5027 }
5028 
5029 #undef __FUNCT__
5030 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5031 /*@C
5032      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5033          and "off-diagonal" part of the matrix in CSR format.
5034 
5035    Collective on MPI_Comm
5036 
5037    Input Parameters:
5038 +  comm - MPI communicator
5039 .  m - number of local rows (Cannot be PETSC_DECIDE)
5040 .  n - This value should be the same as the local size used in creating the
5041        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5042        calculated if N is given) For square matrices n is almost always m.
5043 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5044 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5045 .   i - row indices for "diagonal" portion of matrix
5046 .   j - column indices
5047 .   a - matrix values
5048 .   oi - row indices for "off-diagonal" portion of matrix
5049 .   oj - column indices
5050 -   oa - matrix values
5051 
5052    Output Parameter:
5053 .   mat - the matrix
5054 
5055    Level: advanced
5056 
5057    Notes:
5058        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5059        must free the arrays once the matrix has been destroyed and not before.
5060 
5061        The i and j indices are 0 based
5062 
5063        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5064 
5065        This sets local rows and cannot be used to set off-processor values.
5066 
5067        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5068        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5069        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5070        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5071        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5072        communication if it is known that only local entries will be set.
5073 
5074 .keywords: matrix, aij, compressed row, sparse, parallel
5075 
5076 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5077           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5078 @*/
5079 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5080 {
5081   PetscErrorCode ierr;
5082   Mat_MPIAIJ     *maij;
5083 
5084   PetscFunctionBegin;
5085   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5086   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5087   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5088   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5089   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5090   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5091   maij = (Mat_MPIAIJ*) (*mat)->data;
5092 
5093   (*mat)->preallocated = PETSC_TRUE;
5094 
5095   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5096   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5097 
5098   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5099   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5100 
5101   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5102   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5103   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5104   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5105 
5106   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5107   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5108   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5109   PetscFunctionReturn(0);
5110 }
5111 
5112 /*
5113     Special version for direct calls from Fortran
5114 */
5115 #include <petsc/private/fortranimpl.h>
5116 
5117 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5118 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5119 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5120 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5121 #endif
5122 
5123 /* Change these macros so can be used in void function */
5124 #undef CHKERRQ
5125 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5126 #undef SETERRQ2
5127 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5128 #undef SETERRQ3
5129 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5130 #undef SETERRQ
5131 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5132 
5133 #undef __FUNCT__
5134 #define __FUNCT__ "matsetvaluesmpiaij_"
5135 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5136 {
5137   Mat            mat  = *mmat;
5138   PetscInt       m    = *mm, n = *mn;
5139   InsertMode     addv = *maddv;
5140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5141   PetscScalar    value;
5142   PetscErrorCode ierr;
5143 
5144   MatCheckPreallocated(mat,1);
5145   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5146 
5147 #if defined(PETSC_USE_DEBUG)
5148   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5149 #endif
5150   {
5151     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5152     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5153     PetscBool roworiented = aij->roworiented;
5154 
5155     /* Some Variables required in the macro */
5156     Mat        A                 = aij->A;
5157     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5158     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5159     MatScalar  *aa               = a->a;
5160     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5161     Mat        B                 = aij->B;
5162     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5163     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5164     MatScalar  *ba               = b->a;
5165 
5166     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5167     PetscInt  nonew = a->nonew;
5168     MatScalar *ap1,*ap2;
5169 
5170     PetscFunctionBegin;
5171     for (i=0; i<m; i++) {
5172       if (im[i] < 0) continue;
5173 #if defined(PETSC_USE_DEBUG)
5174       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5175 #endif
5176       if (im[i] >= rstart && im[i] < rend) {
5177         row      = im[i] - rstart;
5178         lastcol1 = -1;
5179         rp1      = aj + ai[row];
5180         ap1      = aa + ai[row];
5181         rmax1    = aimax[row];
5182         nrow1    = ailen[row];
5183         low1     = 0;
5184         high1    = nrow1;
5185         lastcol2 = -1;
5186         rp2      = bj + bi[row];
5187         ap2      = ba + bi[row];
5188         rmax2    = bimax[row];
5189         nrow2    = bilen[row];
5190         low2     = 0;
5191         high2    = nrow2;
5192 
5193         for (j=0; j<n; j++) {
5194           if (roworiented) value = v[i*n+j];
5195           else value = v[i+j*m];
5196           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5197           if (in[j] >= cstart && in[j] < cend) {
5198             col = in[j] - cstart;
5199             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5200           } else if (in[j] < 0) continue;
5201 #if defined(PETSC_USE_DEBUG)
5202           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5203 #endif
5204           else {
5205             if (mat->was_assembled) {
5206               if (!aij->colmap) {
5207                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5208               }
5209 #if defined(PETSC_USE_CTABLE)
5210               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5211               col--;
5212 #else
5213               col = aij->colmap[in[j]] - 1;
5214 #endif
5215               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5216                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5217                 col  =  in[j];
5218                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5219                 B     = aij->B;
5220                 b     = (Mat_SeqAIJ*)B->data;
5221                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5222                 rp2   = bj + bi[row];
5223                 ap2   = ba + bi[row];
5224                 rmax2 = bimax[row];
5225                 nrow2 = bilen[row];
5226                 low2  = 0;
5227                 high2 = nrow2;
5228                 bm    = aij->B->rmap->n;
5229                 ba    = b->a;
5230               }
5231             } else col = in[j];
5232             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5233           }
5234         }
5235       } else if (!aij->donotstash) {
5236         if (roworiented) {
5237           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5238         } else {
5239           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5240         }
5241       }
5242     }
5243   }
5244   PetscFunctionReturnVoid();
5245 }
5246 
5247