xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fb694a9e8029b89c4e426154d038aa2dbdb99bcc)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125   PetscBool         cong;
126 
127   PetscFunctionBegin;
128   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
129   if (Y->assembled && cong) {
130     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
131   } else {
132     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
133   }
134   PetscFunctionReturn(0);
135 }
136 
137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
138 {
139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
140   PetscErrorCode ierr;
141   PetscInt       i,rstart,nrows,*rows;
142 
143   PetscFunctionBegin;
144   *zrows = NULL;
145   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
146   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
147   for (i=0; i<nrows; i++) rows[i] += rstart;
148   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
153 {
154   PetscErrorCode ierr;
155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
156   PetscInt       i,n,*garray = aij->garray;
157   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
158   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
159   PetscReal      *work;
160 
161   PetscFunctionBegin;
162   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
163   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
164   if (type == NORM_2) {
165     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
166       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
167     }
168     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
169       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
170     }
171   } else if (type == NORM_1) {
172     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
173       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
174     }
175     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
176       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
177     }
178   } else if (type == NORM_INFINITY) {
179     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
180       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
181     }
182     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
183       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
184     }
185 
186   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
187   if (type == NORM_INFINITY) {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   } else {
190     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
191   }
192   ierr = PetscFree(work);CHKERRQ(ierr);
193   if (type == NORM_2) {
194     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
195   }
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
200 {
201   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
202   IS              sis,gis;
203   PetscErrorCode  ierr;
204   const PetscInt  *isis,*igis;
205   PetscInt        n,*iis,nsis,ngis,rstart,i;
206 
207   PetscFunctionBegin;
208   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
209   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
210   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
211   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
212   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
213   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
216   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
218   n    = ngis + nsis;
219   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
220   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
221   for (i=0; i<n; i++) iis[i] += rstart;
222   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
223 
224   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
225   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
226   ierr = ISDestroy(&sis);CHKERRQ(ierr);
227   ierr = ISDestroy(&gis);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /*
232     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
233     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
234 
235     Only for square matrices
236 
237     Used by a preconditioner, hence PETSC_EXTERN
238 */
239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
240 {
241   PetscMPIInt    rank,size;
242   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
243   PetscErrorCode ierr;
244   Mat            mat;
245   Mat_SeqAIJ     *gmata;
246   PetscMPIInt    tag;
247   MPI_Status     status;
248   PetscBool      aij;
249   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
254   if (!rank) {
255     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
256     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
257   }
258   if (reuse == MAT_INITIAL_MATRIX) {
259     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
260     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
261     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
262     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
263     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
264     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
265     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
266     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
267     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
268 
269     rowners[0] = 0;
270     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
271     rstart = rowners[rank];
272     rend   = rowners[rank+1];
273     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
274     if (!rank) {
275       gmata = (Mat_SeqAIJ*) gmat->data;
276       /* send row lengths to all processors */
277       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
278       for (i=1; i<size; i++) {
279         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
280       }
281       /* determine number diagonal and off-diagonal counts */
282       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
283       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
284       jj   = 0;
285       for (i=0; i<m; i++) {
286         for (j=0; j<dlens[i]; j++) {
287           if (gmata->j[jj] < rstart) ld[i]++;
288           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
289           jj++;
290         }
291       }
292       /* send column indices to other processes */
293       for (i=1; i<size; i++) {
294         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
295         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
296         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297       }
298 
299       /* send numerical values to other processes */
300       for (i=1; i<size; i++) {
301         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
302         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
303       }
304       gmataa = gmata->a;
305       gmataj = gmata->j;
306 
307     } else {
308       /* receive row lengths */
309       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* receive column indices */
311       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
313       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
314       /* determine number diagonal and off-diagonal counts */
315       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
316       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
317       jj   = 0;
318       for (i=0; i<m; i++) {
319         for (j=0; j<dlens[i]; j++) {
320           if (gmataj[jj] < rstart) ld[i]++;
321           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
322           jj++;
323         }
324       }
325       /* receive numerical values */
326       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
327       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
328     }
329     /* set preallocation */
330     for (i=0; i<m; i++) {
331       dlens[i] -= olens[i];
332     }
333     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
334     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
335 
336     for (i=0; i<m; i++) {
337       dlens[i] += olens[i];
338     }
339     cnt = 0;
340     for (i=0; i<m; i++) {
341       row  = rstart + i;
342       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
343       cnt += dlens[i];
344     }
345     if (rank) {
346       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
347     }
348     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
349     ierr = PetscFree(rowners);CHKERRQ(ierr);
350 
351     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
352 
353     *inmat = mat;
354   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
355     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
356     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
357     mat  = *inmat;
358     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
359     if (!rank) {
360       /* send numerical values to other processes */
361       gmata  = (Mat_SeqAIJ*) gmat->data;
362       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
363       gmataa = gmata->a;
364       for (i=1; i<size; i++) {
365         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
366         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
367       }
368       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
369     } else {
370       /* receive numerical values from process 0*/
371       nz   = Ad->nz + Ao->nz;
372       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
373       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
374     }
375     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
376     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
377     ad = Ad->a;
378     ao = Ao->a;
379     if (mat->rmap->n) {
380       i  = 0;
381       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     for (i=1; i<mat->rmap->n; i++) {
385       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
386       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
387     }
388     i--;
389     if (mat->rmap->n) {
390       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (rank) {
393       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
394     }
395   }
396   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402   Local utility routine that creates a mapping from the global column
403 number to the local number in the off-diagonal part of the local
404 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
405 a slightly higher hash table cost; without it it is not scalable (each processor
406 has an order N integer array but is fast to acess.
407 */
408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       n = aij->B->cmap->n,i;
413 
414   PetscFunctionBegin;
415   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
416 #if defined(PETSC_USE_CTABLE)
417   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
418   for (i=0; i<n; i++) {
419     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
420   }
421 #else
422   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
423   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
424   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
430 { \
431     if (col <= lastcol1)  low1 = 0;     \
432     else                 high1 = nrow1; \
433     lastcol1 = col;\
434     while (high1-low1 > 5) { \
435       t = (low1+high1)/2; \
436       if (rp1[t] > col) high1 = t; \
437       else              low1  = t; \
438     } \
439       for (_i=low1; _i<high1; _i++) { \
440         if (rp1[_i] > col) break; \
441         if (rp1[_i] == col) { \
442           if (addv == ADD_VALUES) ap1[_i] += value;   \
443           else                    ap1[_i] = value; \
444           goto a_noinsert; \
445         } \
446       }  \
447       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
448       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
449       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
450       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
451       N = nrow1++ - 1; a->nz++; high1++; \
452       /* shift up all the later entries in this row */ \
453       for (ii=N; ii>=_i; ii--) { \
454         rp1[ii+1] = rp1[ii]; \
455         ap1[ii+1] = ap1[ii]; \
456       } \
457       rp1[_i] = col;  \
458       ap1[_i] = value;  \
459       A->nonzerostate++;\
460       a_noinsert: ; \
461       ailen[row] = nrow1; \
462 }
463 
464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
465   { \
466     if (col <= lastcol2) low2 = 0;                        \
467     else high2 = nrow2;                                   \
468     lastcol2 = col;                                       \
469     while (high2-low2 > 5) {                              \
470       t = (low2+high2)/2;                                 \
471       if (rp2[t] > col) high2 = t;                        \
472       else             low2  = t;                         \
473     }                                                     \
474     for (_i=low2; _i<high2; _i++) {                       \
475       if (rp2[_i] > col) break;                           \
476       if (rp2[_i] == col) {                               \
477         if (addv == ADD_VALUES) ap2[_i] += value;         \
478         else                    ap2[_i] = value;          \
479         goto b_noinsert;                                  \
480       }                                                   \
481     }                                                     \
482     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
483     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
484     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
485     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
486     N = nrow2++ - 1; b->nz++; high2++;                    \
487     /* shift up all the later entries in this row */      \
488     for (ii=N; ii>=_i; ii--) {                            \
489       rp2[ii+1] = rp2[ii];                                \
490       ap2[ii+1] = ap2[ii];                                \
491     }                                                     \
492     rp2[_i] = col;                                        \
493     ap2[_i] = value;                                      \
494     B->nonzerostate++;                                    \
495     b_noinsert: ;                                         \
496     bilen[row] = nrow2;                                   \
497   }
498 
499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
500 {
501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
502   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
503   PetscErrorCode ierr;
504   PetscInt       l,*garray = mat->garray,diag;
505 
506   PetscFunctionBegin;
507   /* code only works for square matrices A */
508 
509   /* find size of row to the left of the diagonal part */
510   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
511   row  = row - diag;
512   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
513     if (garray[b->j[b->i[row]+l]] > diag) break;
514   }
515   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* diagonal part */
518   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
519 
520   /* right of diagonal part */
521   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
526 {
527   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
528   PetscScalar    value;
529   PetscErrorCode ierr;
530   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
531   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
532   PetscBool      roworiented = aij->roworiented;
533 
534   /* Some Variables required in the macro */
535   Mat        A                 = aij->A;
536   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
537   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
538   MatScalar  *aa               = a->a;
539   PetscBool  ignorezeroentries = a->ignorezeroentries;
540   Mat        B                 = aij->B;
541   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
542   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
543   MatScalar  *ba               = b->a;
544 
545   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
546   PetscInt  nonew;
547   MatScalar *ap1,*ap2;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552 #if defined(PETSC_USE_DEBUG)
553     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
554 #endif
555     if (im[i] >= rstart && im[i] < rend) {
556       row      = im[i] - rstart;
557       lastcol1 = -1;
558       rp1      = aj + ai[row];
559       ap1      = aa + ai[row];
560       rmax1    = aimax[row];
561       nrow1    = ailen[row];
562       low1     = 0;
563       high1    = nrow1;
564       lastcol2 = -1;
565       rp2      = bj + bi[row];
566       ap2      = ba + bi[row];
567       rmax2    = bimax[row];
568       nrow2    = bilen[row];
569       low2     = 0;
570       high2    = nrow2;
571 
572       for (j=0; j<n; j++) {
573         if (roworiented) value = v[i*n+j];
574         else             value = v[i+j*m];
575         if (in[j] >= cstart && in[j] < cend) {
576           col   = in[j] - cstart;
577           nonew = a->nonew;
578           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
580         } else if (in[j] < 0) continue;
581 #if defined(PETSC_USE_DEBUG)
582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
583 #endif
584         else {
585           if (mat->was_assembled) {
586             if (!aij->colmap) {
587               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 #if defined(PETSC_USE_CTABLE)
590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
591             col--;
592 #else
593             col = aij->colmap[in[j]] - 1;
594 #endif
595             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
596               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
597               col  =  in[j];
598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
599               B     = aij->B;
600               b     = (Mat_SeqAIJ*)B->data;
601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
602               rp2   = bj + bi[row];
603               ap2   = ba + bi[row];
604               rmax2 = bimax[row];
605               nrow2 = bilen[row];
606               low2  = 0;
607               high2 = nrow2;
608               bm    = aij->B->rmap->n;
609               ba    = b->a;
610             } else if (col < 0) {
611               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
612                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
613               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
618         }
619       }
620     } else {
621       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
637     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
638     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
639 */
640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
641 {
642   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
643   Mat            A           = aij->A; /* diagonal part of the matrix */
644   Mat            B           = aij->B; /* offdiagonal part of the matrix */
645   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
646   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
647   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
648   PetscInt       *ailen      = a->ilen,*aj = a->j;
649   PetscInt       *bilen      = b->ilen,*bj = b->j;
650   PetscInt       am          = aij->A->rmap->n,j;
651   PetscInt       diag_so_far = 0,dnz;
652   PetscInt       offd_so_far = 0,onz;
653 
654   PetscFunctionBegin;
655   /* Iterate over all rows of the matrix */
656   for (j=0; j<am; j++) {
657     dnz = onz = 0;
658     /*  Iterate over all non-zero columns of the current row */
659     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
660       /* If column is in the diagonal */
661       if (mat_j[col] >= cstart && mat_j[col] < cend) {
662         aj[diag_so_far++] = mat_j[col] - cstart;
663         dnz++;
664       } else { /* off-diagonal entries */
665         bj[offd_so_far++] = mat_j[col];
666         onz++;
667       }
668     }
669     ailen[j] = dnz;
670     bilen[j] = onz;
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
677     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
678     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
679     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
680     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
681 */
682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
683 {
684   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
685   Mat            A      = aij->A; /* diagonal part of the matrix */
686   Mat            B      = aij->B; /* offdiagonal part of the matrix */
687   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
688   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
689   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
690   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
691   PetscInt       *ailen = a->ilen,*aj = a->j;
692   PetscInt       *bilen = b->ilen,*bj = b->j;
693   PetscInt       am     = aij->A->rmap->n,j;
694   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
695   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
696   PetscScalar    *aa = a->a,*ba = b->a;
697 
698   PetscFunctionBegin;
699   /* Iterate over all rows of the matrix */
700   for (j=0; j<am; j++) {
701     dnz_row = onz_row = 0;
702     rowstart_offd = full_offd_i[j];
703     rowstart_diag = full_diag_i[j];
704     /*  Iterate over all non-zero columns of the current row */
705     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
706       /* If column is in the diagonal */
707       if (mat_j[col] >= cstart && mat_j[col] < cend) {
708         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
709         aa[rowstart_diag+dnz_row] = mat_a[col];
710         dnz_row++;
711       } else { /* off-diagonal entries */
712         bj[rowstart_offd+onz_row] = mat_j[col];
713         ba[rowstart_offd+onz_row] = mat_a[col];
714         onz_row++;
715       }
716     }
717     ailen[j] = dnz_row;
718     bilen[j] = onz_row;
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
724 {
725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
726   PetscErrorCode ierr;
727   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
728   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
729 
730   PetscFunctionBegin;
731   for (i=0; i<m; i++) {
732     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
733     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
734     if (idxm[i] >= rstart && idxm[i] < rend) {
735       row = idxm[i] - rstart;
736       for (j=0; j<n; j++) {
737         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
738         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
739         if (idxn[j] >= cstart && idxn[j] < cend) {
740           col  = idxn[j] - cstart;
741           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
742         } else {
743           if (!aij->colmap) {
744             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
745           }
746 #if defined(PETSC_USE_CTABLE)
747           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
748           col--;
749 #else
750           col = aij->colmap[idxn[j]] - 1;
751 #endif
752           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
753           else {
754             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
755           }
756         }
757       }
758     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
764 
765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
766 {
767   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
768   PetscErrorCode ierr;
769   PetscInt       nstash,reallocs;
770 
771   PetscFunctionBegin;
772   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
773 
774   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
775   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
776   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
781 {
782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
783   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
784   PetscErrorCode ierr;
785   PetscMPIInt    n;
786   PetscInt       i,j,rstart,ncols,flg;
787   PetscInt       *row,*col;
788   PetscBool      other_disassembled;
789   PetscScalar    *val;
790 
791   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
792 
793   PetscFunctionBegin;
794   if (!aij->donotstash && !mat->nooffprocentries) {
795     while (1) {
796       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
797       if (!flg) break;
798 
799       for (i=0; i<n; ) {
800         /* Now identify the consecutive vals belonging to the same row */
801         for (j=i,rstart=row[j]; j<n; j++) {
802           if (row[j] != rstart) break;
803         }
804         if (j < n) ncols = j-i;
805         else       ncols = n-i;
806         /* Now assemble all these values with a single function call */
807         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
808 
809         i = j;
810       }
811     }
812     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
813   }
814   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
815   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
816 
817   /* determine if any processor has disassembled, if so we must
818      also disassemble ourselfs, in order that we may reassemble. */
819   /*
820      if nonzero structure of submatrix B cannot change then we know that
821      no processor disassembled thus we can skip this stuff
822   */
823   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
824     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
825     if (mat->was_assembled && !other_disassembled) {
826       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
827     }
828   }
829   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
830     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
831   }
832   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
833   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
835 
836   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
837 
838   aij->rowvalues = 0;
839 
840   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
841   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
842 
843   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
844   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
845     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
846     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
852 {
853   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
854   PetscErrorCode ierr;
855 
856   PetscFunctionBegin;
857   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
858   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
865   PetscInt      *lrows;
866   PetscInt       r, len;
867   PetscBool      cong;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
873   /* fix right hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
879     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
880     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
881     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
882     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
883   }
884   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
885   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
886   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
887   if ((diag != 0.0) && cong) {
888     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
889   } else if (diag != 0.0) {
890     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
891     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
892     for (r = 0; r < len; ++r) {
893       const PetscInt row = lrows[r] + A->rmap->rstart;
894       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
895     }
896     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
897     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898   } else {
899     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
900   }
901   ierr = PetscFree(lrows);CHKERRQ(ierr);
902 
903   /* only change matrix nonzero state if pattern was allowed to be changed */
904   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
905     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
906     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
912 {
913   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
914   PetscErrorCode    ierr;
915   PetscMPIInt       n = A->rmap->n;
916   PetscInt          i,j,r,m,p = 0,len = 0;
917   PetscInt          *lrows,*owners = A->rmap->range;
918   PetscSFNode       *rrows;
919   PetscSF           sf;
920   const PetscScalar *xx;
921   PetscScalar       *bb,*mask;
922   Vec               xmask,lmask;
923   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
924   const PetscInt    *aj, *ii,*ridx;
925   PetscScalar       *aa;
926 
927   PetscFunctionBegin;
928   /* Create SF where leaves are input rows and roots are owned rows */
929   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
930   for (r = 0; r < n; ++r) lrows[r] = -1;
931   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
932   for (r = 0; r < N; ++r) {
933     const PetscInt idx   = rows[r];
934     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
935     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
936       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
937     }
938     rrows[r].rank  = p;
939     rrows[r].index = rows[r] - owners[p];
940   }
941   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
942   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
943   /* Collect flags for rows to be zeroed */
944   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
945   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
946   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
947   /* Compress and put in row numbers */
948   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
949   /* zero diagonal part of matrix */
950   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
951   /* handle off diagonal part of matrix */
952   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
953   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
954   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
955   for (i=0; i<len; i++) bb[lrows[i]] = 1;
956   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
957   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
960   if (x) {
961     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
964     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
965   }
966   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
967   /* remove zeroed rows of off diagonal matrix */
968   ii = aij->i;
969   for (i=0; i<len; i++) {
970     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
971   }
972   /* loop over all elements of off process part of matrix zeroing removed columns*/
973   if (aij->compressedrow.use) {
974     m    = aij->compressedrow.nrows;
975     ii   = aij->compressedrow.i;
976     ridx = aij->compressedrow.rindex;
977     for (i=0; i<m; i++) {
978       n  = ii[i+1] - ii[i];
979       aj = aij->j + ii[i];
980       aa = aij->a + ii[i];
981 
982       for (j=0; j<n; j++) {
983         if (PetscAbsScalar(mask[*aj])) {
984           if (b) bb[*ridx] -= *aa*xx[*aj];
985           *aa = 0.0;
986         }
987         aa++;
988         aj++;
989       }
990       ridx++;
991     }
992   } else { /* do not use compressed row format */
993     m = l->B->rmap->n;
994     for (i=0; i<m; i++) {
995       n  = ii[i+1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij->a + ii[i];
998       for (j=0; j<n; j++) {
999         if (PetscAbsScalar(mask[*aj])) {
1000           if (b) bb[i] -= *aa*xx[*aj];
1001           *aa = 0.0;
1002         }
1003         aa++;
1004         aj++;
1005       }
1006     }
1007   }
1008   if (x) {
1009     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1010     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011   }
1012   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1013   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1014   ierr = PetscFree(lrows);CHKERRQ(ierr);
1015 
1016   /* only change matrix nonzero state if pattern was allowed to be changed */
1017   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1018     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1019     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1025 {
1026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1027   PetscErrorCode ierr;
1028   PetscInt       nt;
1029   VecScatter     Mvctx = a->Mvctx;
1030 
1031   PetscFunctionBegin;
1032   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1033   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1034 
1035   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1043 {
1044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   VecScatter     Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1060   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071   PetscBool      merged;
1072 
1073   PetscFunctionBegin;
1074   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1075   /* do nondiagonal part */
1076   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1077   if (!merged) {
1078     /* send it on its way */
1079     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080     /* do local part */
1081     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1082     /* receive remote parts: note this assumes the values are not actually */
1083     /* added in yy until the next line, */
1084     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   } else {
1086     /* do local part */
1087     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1088     /* send it on its way */
1089     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090     /* values actually were received in the Begin() but we need to call this nop */
1091     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1097 {
1098   MPI_Comm       comm;
1099   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1100   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1101   IS             Me,Notme;
1102   PetscErrorCode ierr;
1103   PetscInt       M,N,first,last,*notme,i;
1104   PetscBool      lf;
1105   PetscMPIInt    size;
1106 
1107   PetscFunctionBegin;
1108   /* Easy test: symmetric diagonal block */
1109   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1110   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1111   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1112   if (!*f) PetscFunctionReturn(0);
1113   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1114   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1115   if (size == 1) PetscFunctionReturn(0);
1116 
1117   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1118   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1119   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1120   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1121   for (i=0; i<first; i++) notme[i] = i;
1122   for (i=last; i<M; i++) notme[i-last+first] = i;
1123   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1124   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1125   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1126   Aoff = Aoffs[0];
1127   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1128   Boff = Boffs[0];
1129   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1130   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1131   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1132   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1133   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1134   ierr = PetscFree(notme);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1139 {
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   /* do nondiagonal part */
1154   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1155   /* send it on its way */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   /* do local part */
1158   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1159   /* receive remote parts */
1160   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 /*
1165   This only works correctly for square matrices where the subblock A->A is the
1166    diagonal block
1167 */
1168 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1169 {
1170   PetscErrorCode ierr;
1171   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1172 
1173   PetscFunctionBegin;
1174   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1175   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1176   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1181 {
1182   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1183   PetscErrorCode ierr;
1184 
1185   PetscFunctionBegin;
1186   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1187   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1192 {
1193   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197 #if defined(PETSC_USE_LOG)
1198   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1199 #endif
1200   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1201   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1202   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1203   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1204 #if defined(PETSC_USE_CTABLE)
1205   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1206 #else
1207   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1208 #endif
1209   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1210   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1211   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1212   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1213   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1214   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1215   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1216 
1217   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1226 #if defined(PETSC_HAVE_ELEMENTAL)
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1228 #endif
1229 #if defined(PETSC_HAVE_HYPRE)
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1232 #endif
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1239 {
1240   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1241   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1242   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1243   PetscErrorCode ierr;
1244   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1245   int            fd;
1246   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1247   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1248   PetscScalar    *column_values;
1249   PetscInt       message_count,flowcontrolcount;
1250   FILE           *file;
1251 
1252   PetscFunctionBegin;
1253   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1254   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1255   nz   = A->nz + B->nz;
1256   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1257   if (!rank) {
1258     header[0] = MAT_FILE_CLASSID;
1259     header[1] = mat->rmap->N;
1260     header[2] = mat->cmap->N;
1261 
1262     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     /* get largest number of rows any processor has */
1265     rlen  = mat->rmap->n;
1266     range = mat->rmap->range;
1267     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1268   } else {
1269     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     rlen = mat->rmap->n;
1271   }
1272 
1273   /* load up the local row counts */
1274   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1275   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1276 
1277   /* store the row lengths to the file */
1278   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1279   if (!rank) {
1280     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     for (i=1; i<size; i++) {
1282       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1283       rlen = range[i+1] - range[i];
1284       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1286     }
1287     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1288   } else {
1289     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1290     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1291     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1292   }
1293   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1294 
1295   /* load up the local column indices */
1296   nzmax = nz; /* th processor needs space a largest processor needs */
1297   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1298   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1299   cnt   = 0;
1300   for (i=0; i<mat->rmap->n; i++) {
1301     for (j=B->i[i]; j<B->i[i+1]; j++) {
1302       if ((col = garray[B->j[j]]) > cstart) break;
1303       column_indices[cnt++] = col;
1304     }
1305     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1306     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1307   }
1308   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1309 
1310   /* store the column indices to the file */
1311   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1312   if (!rank) {
1313     MPI_Status status;
1314     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1315     for (i=1; i<size; i++) {
1316       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1317       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1318       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1319       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1320       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1321     }
1322     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1323   } else {
1324     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1325     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1326     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1327     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1328   }
1329   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1330 
1331   /* load up the local column values */
1332   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1333   cnt  = 0;
1334   for (i=0; i<mat->rmap->n; i++) {
1335     for (j=B->i[i]; j<B->i[i+1]; j++) {
1336       if (garray[B->j[j]] > cstart) break;
1337       column_values[cnt++] = B->a[j];
1338     }
1339     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1340     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1341   }
1342   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1343 
1344   /* store the column values to the file */
1345   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1346   if (!rank) {
1347     MPI_Status status;
1348     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1349     for (i=1; i<size; i++) {
1350       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1351       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1352       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1353       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1354       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1355     }
1356     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1357   } else {
1358     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1359     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1360     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1361     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1362   }
1363   ierr = PetscFree(column_values);CHKERRQ(ierr);
1364 
1365   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1366   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #include <petscdraw.h>
1371 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1372 {
1373   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1374   PetscErrorCode    ierr;
1375   PetscMPIInt       rank = aij->rank,size = aij->size;
1376   PetscBool         isdraw,iascii,isbinary;
1377   PetscViewer       sviewer;
1378   PetscViewerFormat format;
1379 
1380   PetscFunctionBegin;
1381   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1382   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1383   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1384   if (iascii) {
1385     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1386     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1387       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1388       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1389       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1390       for (i=0; i<(PetscInt)size; i++) {
1391         nmax = PetscMax(nmax,nz[i]);
1392         nmin = PetscMin(nmin,nz[i]);
1393         navg += nz[i];
1394       }
1395       ierr = PetscFree(nz);CHKERRQ(ierr);
1396       navg = navg/size;
1397       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1398       PetscFunctionReturn(0);
1399     }
1400     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1401     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1402       MatInfo   info;
1403       PetscBool inodes;
1404 
1405       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1406       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1407       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1408       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1409       if (!inodes) {
1410         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1411                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1412       } else {
1413         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1414                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1415       }
1416       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1417       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1418       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1420       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1421       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1422       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1423       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1424       PetscFunctionReturn(0);
1425     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1426       PetscInt inodecount,inodelimit,*inodes;
1427       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1428       if (inodes) {
1429         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1430       } else {
1431         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1432       }
1433       PetscFunctionReturn(0);
1434     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1435       PetscFunctionReturn(0);
1436     }
1437   } else if (isbinary) {
1438     if (size == 1) {
1439       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1440       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1441     } else {
1442       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1443     }
1444     PetscFunctionReturn(0);
1445   } else if (isdraw) {
1446     PetscDraw draw;
1447     PetscBool isnull;
1448     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1449     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1450     if (isnull) PetscFunctionReturn(0);
1451   }
1452 
1453   {
1454     /* assemble the entire matrix onto first processor. */
1455     Mat        A;
1456     Mat_SeqAIJ *Aloc;
1457     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1458     MatScalar  *a;
1459 
1460     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1461     if (!rank) {
1462       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1463     } else {
1464       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1465     }
1466     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1467     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1468     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1469     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1470     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1471 
1472     /* copy over the A part */
1473     Aloc = (Mat_SeqAIJ*)aij->A->data;
1474     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1475     row  = mat->rmap->rstart;
1476     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1477     for (i=0; i<m; i++) {
1478       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1479       row++;
1480       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1481     }
1482     aj = Aloc->j;
1483     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1484 
1485     /* copy over the B part */
1486     Aloc = (Mat_SeqAIJ*)aij->B->data;
1487     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1488     row  = mat->rmap->rstart;
1489     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1490     ct   = cols;
1491     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1492     for (i=0; i<m; i++) {
1493       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1494       row++;
1495       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1496     }
1497     ierr = PetscFree(ct);CHKERRQ(ierr);
1498     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1499     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1500     /*
1501        Everyone has to call to draw the matrix since the graphics waits are
1502        synchronized across all processors that share the PetscDraw object
1503     */
1504     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     if (!rank) {
1506       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1507       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1508     }
1509     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1510     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1511     ierr = MatDestroy(&A);CHKERRQ(ierr);
1512   }
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1517 {
1518   PetscErrorCode ierr;
1519   PetscBool      iascii,isdraw,issocket,isbinary;
1520 
1521   PetscFunctionBegin;
1522   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1524   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1525   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1526   if (iascii || isdraw || isbinary || issocket) {
1527     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1533 {
1534   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1535   PetscErrorCode ierr;
1536   Vec            bb1 = 0;
1537   PetscBool      hasop;
1538 
1539   PetscFunctionBegin;
1540   if (flag == SOR_APPLY_UPPER) {
1541     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1542     PetscFunctionReturn(0);
1543   }
1544 
1545   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1546     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1547   }
1548 
1549   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1550     if (flag & SOR_ZERO_INITIAL_GUESS) {
1551       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1552       its--;
1553     }
1554 
1555     while (its--) {
1556       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1557       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1558 
1559       /* update rhs: bb1 = bb - B*x */
1560       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1561       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1562 
1563       /* local sweep */
1564       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1565     }
1566   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1567     if (flag & SOR_ZERO_INITIAL_GUESS) {
1568       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1569       its--;
1570     }
1571     while (its--) {
1572       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574 
1575       /* update rhs: bb1 = bb - B*x */
1576       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1577       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1578 
1579       /* local sweep */
1580       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1581     }
1582   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1583     if (flag & SOR_ZERO_INITIAL_GUESS) {
1584       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1585       its--;
1586     }
1587     while (its--) {
1588       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1589       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590 
1591       /* update rhs: bb1 = bb - B*x */
1592       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1593       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1594 
1595       /* local sweep */
1596       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1597     }
1598   } else if (flag & SOR_EISENSTAT) {
1599     Vec xx1;
1600 
1601     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1602     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1603 
1604     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606     if (!mat->diag) {
1607       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1608       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1609     }
1610     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1611     if (hasop) {
1612       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1613     } else {
1614       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1615     }
1616     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1617 
1618     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1619 
1620     /* local sweep */
1621     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1622     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1623     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1624   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1625 
1626   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1627 
1628   matin->factorerrortype = mat->A->factorerrortype;
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1633 {
1634   Mat            aA,aB,Aperm;
1635   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1636   PetscScalar    *aa,*ba;
1637   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1638   PetscSF        rowsf,sf;
1639   IS             parcolp = NULL;
1640   PetscBool      done;
1641   PetscErrorCode ierr;
1642 
1643   PetscFunctionBegin;
1644   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1645   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1646   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1647   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1648 
1649   /* Invert row permutation to find out where my rows should go */
1650   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1651   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1652   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1653   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1654   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1655   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1656 
1657   /* Invert column permutation to find out where my columns should go */
1658   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1659   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1660   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1661   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1662   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1663   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1664   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1665 
1666   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1667   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1668   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1669 
1670   /* Find out where my gcols should go */
1671   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1672   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1674   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1675   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1676   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1677   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1678   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1679 
1680   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1681   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1682   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1683   for (i=0; i<m; i++) {
1684     PetscInt row = rdest[i],rowner;
1685     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1686     for (j=ai[i]; j<ai[i+1]; j++) {
1687       PetscInt cowner,col = cdest[aj[j]];
1688       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1689       if (rowner == cowner) dnnz[i]++;
1690       else onnz[i]++;
1691     }
1692     for (j=bi[i]; j<bi[i+1]; j++) {
1693       PetscInt cowner,col = gcdest[bj[j]];
1694       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1695       if (rowner == cowner) dnnz[i]++;
1696       else onnz[i]++;
1697     }
1698   }
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1701   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1702   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1703   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1704 
1705   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1706   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1707   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1708   for (i=0; i<m; i++) {
1709     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1710     PetscInt j0,rowlen;
1711     rowlen = ai[i+1] - ai[i];
1712     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1713       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1714       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1715     }
1716     rowlen = bi[i+1] - bi[i];
1717     for (j0=j=0; j<rowlen; j0=j) {
1718       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1719       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1720     }
1721   }
1722   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1725   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1726   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1727   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1728   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1729   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1730   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1731   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1732   *B = Aperm;
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1737 {
1738   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1743   if (ghosts) *ghosts = aij->garray;
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1748 {
1749   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1750   Mat            A    = mat->A,B = mat->B;
1751   PetscErrorCode ierr;
1752   PetscReal      isend[5],irecv[5];
1753 
1754   PetscFunctionBegin;
1755   info->block_size = 1.0;
1756   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1757 
1758   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1759   isend[3] = info->memory;  isend[4] = info->mallocs;
1760 
1761   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1762 
1763   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1764   isend[3] += info->memory;  isend[4] += info->mallocs;
1765   if (flag == MAT_LOCAL) {
1766     info->nz_used      = isend[0];
1767     info->nz_allocated = isend[1];
1768     info->nz_unneeded  = isend[2];
1769     info->memory       = isend[3];
1770     info->mallocs      = isend[4];
1771   } else if (flag == MAT_GLOBAL_MAX) {
1772     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1773 
1774     info->nz_used      = irecv[0];
1775     info->nz_allocated = irecv[1];
1776     info->nz_unneeded  = irecv[2];
1777     info->memory       = irecv[3];
1778     info->mallocs      = irecv[4];
1779   } else if (flag == MAT_GLOBAL_SUM) {
1780     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1781 
1782     info->nz_used      = irecv[0];
1783     info->nz_allocated = irecv[1];
1784     info->nz_unneeded  = irecv[2];
1785     info->memory       = irecv[3];
1786     info->mallocs      = irecv[4];
1787   }
1788   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1789   info->fill_ratio_needed = 0;
1790   info->factor_mallocs    = 0;
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1795 {
1796   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1797   PetscErrorCode ierr;
1798 
1799   PetscFunctionBegin;
1800   switch (op) {
1801   case MAT_NEW_NONZERO_LOCATIONS:
1802   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1803   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1804   case MAT_KEEP_NONZERO_PATTERN:
1805   case MAT_NEW_NONZERO_LOCATION_ERR:
1806   case MAT_USE_INODES:
1807   case MAT_IGNORE_ZERO_ENTRIES:
1808     MatCheckPreallocated(A,1);
1809     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1810     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1811     break;
1812   case MAT_ROW_ORIENTED:
1813     MatCheckPreallocated(A,1);
1814     a->roworiented = flg;
1815 
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_NEW_DIAGONALS:
1820     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1821     break;
1822   case MAT_IGNORE_OFF_PROC_ENTRIES:
1823     a->donotstash = flg;
1824     break;
1825   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1826   case MAT_SPD:
1827   case MAT_SYMMETRIC:
1828   case MAT_STRUCTURALLY_SYMMETRIC:
1829   case MAT_HERMITIAN:
1830   case MAT_SYMMETRY_ETERNAL:
1831     break;
1832   case MAT_SUBMAT_SINGLEIS:
1833     A->submat_singleis = flg;
1834     break;
1835   case MAT_STRUCTURE_ONLY:
1836     /* The option is handled directly by MatSetOption() */
1837     break;
1838   default:
1839     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1840   }
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1845 {
1846   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1847   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1848   PetscErrorCode ierr;
1849   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1850   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1851   PetscInt       *cmap,*idx_p;
1852 
1853   PetscFunctionBegin;
1854   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1855   mat->getrowactive = PETSC_TRUE;
1856 
1857   if (!mat->rowvalues && (idx || v)) {
1858     /*
1859         allocate enough space to hold information from the longest row.
1860     */
1861     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1862     PetscInt   max = 1,tmp;
1863     for (i=0; i<matin->rmap->n; i++) {
1864       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1865       if (max < tmp) max = tmp;
1866     }
1867     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1868   }
1869 
1870   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1871   lrow = row - rstart;
1872 
1873   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1874   if (!v)   {pvA = 0; pvB = 0;}
1875   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1876   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1877   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1878   nztot = nzA + nzB;
1879 
1880   cmap = mat->garray;
1881   if (v  || idx) {
1882     if (nztot) {
1883       /* Sort by increasing column numbers, assuming A and B already sorted */
1884       PetscInt imark = -1;
1885       if (v) {
1886         *v = v_p = mat->rowvalues;
1887         for (i=0; i<nzB; i++) {
1888           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1889           else break;
1890         }
1891         imark = i;
1892         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1893         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1894       }
1895       if (idx) {
1896         *idx = idx_p = mat->rowindices;
1897         if (imark > -1) {
1898           for (i=0; i<imark; i++) {
1899             idx_p[i] = cmap[cworkB[i]];
1900           }
1901         } else {
1902           for (i=0; i<nzB; i++) {
1903             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1904             else break;
1905           }
1906           imark = i;
1907         }
1908         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1909         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1910       }
1911     } else {
1912       if (idx) *idx = 0;
1913       if (v)   *v   = 0;
1914     }
1915   }
1916   *nz  = nztot;
1917   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1918   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1923 {
1924   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1925 
1926   PetscFunctionBegin;
1927   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1928   aij->getrowactive = PETSC_FALSE;
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1933 {
1934   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1935   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1936   PetscErrorCode ierr;
1937   PetscInt       i,j,cstart = mat->cmap->rstart;
1938   PetscReal      sum = 0.0;
1939   MatScalar      *v;
1940 
1941   PetscFunctionBegin;
1942   if (aij->size == 1) {
1943     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1944   } else {
1945     if (type == NORM_FROBENIUS) {
1946       v = amat->a;
1947       for (i=0; i<amat->nz; i++) {
1948         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1949       }
1950       v = bmat->a;
1951       for (i=0; i<bmat->nz; i++) {
1952         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1953       }
1954       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1955       *norm = PetscSqrtReal(*norm);
1956       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1957     } else if (type == NORM_1) { /* max column norm */
1958       PetscReal *tmp,*tmp2;
1959       PetscInt  *jj,*garray = aij->garray;
1960       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1961       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1962       *norm = 0.0;
1963       v     = amat->a; jj = amat->j;
1964       for (j=0; j<amat->nz; j++) {
1965         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1966       }
1967       v = bmat->a; jj = bmat->j;
1968       for (j=0; j<bmat->nz; j++) {
1969         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1970       }
1971       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1972       for (j=0; j<mat->cmap->N; j++) {
1973         if (tmp2[j] > *norm) *norm = tmp2[j];
1974       }
1975       ierr = PetscFree(tmp);CHKERRQ(ierr);
1976       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1977       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1978     } else if (type == NORM_INFINITY) { /* max row norm */
1979       PetscReal ntemp = 0.0;
1980       for (j=0; j<aij->A->rmap->n; j++) {
1981         v   = amat->a + amat->i[j];
1982         sum = 0.0;
1983         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1984           sum += PetscAbsScalar(*v); v++;
1985         }
1986         v = bmat->a + bmat->i[j];
1987         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1988           sum += PetscAbsScalar(*v); v++;
1989         }
1990         if (sum > ntemp) ntemp = sum;
1991       }
1992       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1993       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1994     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1995   }
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2000 {
2001   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2002   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2003   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2004   PetscErrorCode ierr;
2005   Mat            B,A_diag,*B_diag;
2006   MatScalar      *array;
2007 
2008   PetscFunctionBegin;
2009   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2010   ai = Aloc->i; aj = Aloc->j;
2011   bi = Bloc->i; bj = Bloc->j;
2012   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2013     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2014     PetscSFNode          *oloc;
2015     PETSC_UNUSED PetscSF sf;
2016 
2017     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2018     /* compute d_nnz for preallocation */
2019     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2020     for (i=0; i<ai[ma]; i++) {
2021       d_nnz[aj[i]]++;
2022     }
2023     /* compute local off-diagonal contributions */
2024     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2025     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2026     /* map those to global */
2027     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2028     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2029     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2030     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2031     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2032     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2033     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2034 
2035     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2036     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2037     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2038     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2039     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2040     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2041   } else {
2042     B    = *matout;
2043     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2044   }
2045 
2046   b           = (Mat_MPIAIJ*)B->data;
2047   A_diag      = a->A;
2048   B_diag      = &b->A;
2049   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2050   A_diag_ncol = A_diag->cmap->N;
2051   B_diag_ilen = sub_B_diag->ilen;
2052   B_diag_i    = sub_B_diag->i;
2053 
2054   /* Set ilen for diagonal of B */
2055   for (i=0; i<A_diag_ncol; i++) {
2056     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2057   }
2058 
2059   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2060   very quickly (=without using MatSetValues), because all writes are local. */
2061   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2062 
2063   /* copy over the B part */
2064   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2065   array = Bloc->a;
2066   row   = A->rmap->rstart;
2067   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2068   cols_tmp = cols;
2069   for (i=0; i<mb; i++) {
2070     ncol = bi[i+1]-bi[i];
2071     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2072     row++;
2073     array += ncol; cols_tmp += ncol;
2074   }
2075   ierr = PetscFree(cols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2080     *matout = B;
2081   } else {
2082     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2088 {
2089   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2090   Mat            a    = aij->A,b = aij->B;
2091   PetscErrorCode ierr;
2092   PetscInt       s1,s2,s3;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2096   if (rr) {
2097     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2098     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2099     /* Overlap communication with computation. */
2100     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2101   }
2102   if (ll) {
2103     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2104     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2105     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2106   }
2107   /* scale  the diagonal block */
2108   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2109 
2110   if (rr) {
2111     /* Do a scatter end and then right scale the off-diagonal block */
2112     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2114   }
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2119 {
2120   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2121   PetscErrorCode ierr;
2122 
2123   PetscFunctionBegin;
2124   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2129 {
2130   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2131   Mat            a,b,c,d;
2132   PetscBool      flg;
2133   PetscErrorCode ierr;
2134 
2135   PetscFunctionBegin;
2136   a = matA->A; b = matA->B;
2137   c = matB->A; d = matB->B;
2138 
2139   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2140   if (flg) {
2141     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2142   }
2143   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2151   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2152 
2153   PetscFunctionBegin;
2154   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2155   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2156     /* because of the column compression in the off-processor part of the matrix a->B,
2157        the number of columns in a->B and b->B may be different, hence we cannot call
2158        the MatCopy() directly on the two parts. If need be, we can provide a more
2159        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2160        then copying the submatrices */
2161     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2162   } else {
2163     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2164     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2165   }
2166   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2171 {
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /*
2180    Computes the number of nonzeros per row needed for preallocation when X and Y
2181    have different nonzero structure.
2182 */
2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2184 {
2185   PetscInt       i,j,k,nzx,nzy;
2186 
2187   PetscFunctionBegin;
2188   /* Set the number of nonzeros in the new matrix */
2189   for (i=0; i<m; i++) {
2190     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2191     nzx = xi[i+1] - xi[i];
2192     nzy = yi[i+1] - yi[i];
2193     nnz[i] = 0;
2194     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2195       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2196       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2197       nnz[i]++;
2198     }
2199     for (; k<nzy; k++) nnz[i]++;
2200   }
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       m = Y->rmap->N;
2209   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2210   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2211 
2212   PetscFunctionBegin;
2213   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2218 {
2219   PetscErrorCode ierr;
2220   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2221   PetscBLASInt   bnz,one=1;
2222   Mat_SeqAIJ     *x,*y;
2223 
2224   PetscFunctionBegin;
2225   if (str == SAME_NONZERO_PATTERN) {
2226     PetscScalar alpha = a;
2227     x    = (Mat_SeqAIJ*)xx->A->data;
2228     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2229     y    = (Mat_SeqAIJ*)yy->A->data;
2230     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2231     x    = (Mat_SeqAIJ*)xx->B->data;
2232     y    = (Mat_SeqAIJ*)yy->B->data;
2233     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2234     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2235     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2236   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2237     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2238   } else {
2239     Mat      B;
2240     PetscInt *nnz_d,*nnz_o;
2241     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2242     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2243     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2244     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2245     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2246     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2247     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2248     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2249     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2250     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2251     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2252     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2253     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2254     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2255   }
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2260 
2261 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2262 {
2263 #if defined(PETSC_USE_COMPLEX)
2264   PetscErrorCode ierr;
2265   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2266 
2267   PetscFunctionBegin;
2268   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2269   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2270 #else
2271   PetscFunctionBegin;
2272 #endif
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2277 {
2278   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2279   PetscErrorCode ierr;
2280 
2281   PetscFunctionBegin;
2282   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2283   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2288 {
2289   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2290   PetscErrorCode ierr;
2291 
2292   PetscFunctionBegin;
2293   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2294   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2299 {
2300   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2301   PetscErrorCode ierr;
2302   PetscInt       i,*idxb = 0;
2303   PetscScalar    *va,*vb;
2304   Vec            vtmp;
2305 
2306   PetscFunctionBegin;
2307   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2308   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2309   if (idx) {
2310     for (i=0; i<A->rmap->n; i++) {
2311       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2312     }
2313   }
2314 
2315   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2316   if (idx) {
2317     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2318   }
2319   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2320   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2321 
2322   for (i=0; i<A->rmap->n; i++) {
2323     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2324       va[i] = vb[i];
2325       if (idx) idx[i] = a->garray[idxb[i]];
2326     }
2327   }
2328 
2329   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2330   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2331   ierr = PetscFree(idxb);CHKERRQ(ierr);
2332   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2337 {
2338   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2339   PetscErrorCode ierr;
2340   PetscInt       i,*idxb = 0;
2341   PetscScalar    *va,*vb;
2342   Vec            vtmp;
2343 
2344   PetscFunctionBegin;
2345   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2346   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2347   if (idx) {
2348     for (i=0; i<A->cmap->n; i++) {
2349       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2350     }
2351   }
2352 
2353   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2354   if (idx) {
2355     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2356   }
2357   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2358   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2359 
2360   for (i=0; i<A->rmap->n; i++) {
2361     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2362       va[i] = vb[i];
2363       if (idx) idx[i] = a->garray[idxb[i]];
2364     }
2365   }
2366 
2367   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2368   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2369   ierr = PetscFree(idxb);CHKERRQ(ierr);
2370   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2375 {
2376   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2377   PetscInt       n      = A->rmap->n;
2378   PetscInt       cstart = A->cmap->rstart;
2379   PetscInt       *cmap  = mat->garray;
2380   PetscInt       *diagIdx, *offdiagIdx;
2381   Vec            diagV, offdiagV;
2382   PetscScalar    *a, *diagA, *offdiagA;
2383   PetscInt       r;
2384   PetscErrorCode ierr;
2385 
2386   PetscFunctionBegin;
2387   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2388   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2389   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2390   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2391   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2392   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2393   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2394   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2395   for (r = 0; r < n; ++r) {
2396     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2397       a[r]   = diagA[r];
2398       idx[r] = cstart + diagIdx[r];
2399     } else {
2400       a[r]   = offdiagA[r];
2401       idx[r] = cmap[offdiagIdx[r]];
2402     }
2403   }
2404   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2405   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2406   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2407   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2408   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2409   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2414 {
2415   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2416   PetscInt       n      = A->rmap->n;
2417   PetscInt       cstart = A->cmap->rstart;
2418   PetscInt       *cmap  = mat->garray;
2419   PetscInt       *diagIdx, *offdiagIdx;
2420   Vec            diagV, offdiagV;
2421   PetscScalar    *a, *diagA, *offdiagA;
2422   PetscInt       r;
2423   PetscErrorCode ierr;
2424 
2425   PetscFunctionBegin;
2426   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2427   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2428   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2429   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2430   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2431   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2432   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2433   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2434   for (r = 0; r < n; ++r) {
2435     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2436       a[r]   = diagA[r];
2437       idx[r] = cstart + diagIdx[r];
2438     } else {
2439       a[r]   = offdiagA[r];
2440       idx[r] = cmap[offdiagIdx[r]];
2441     }
2442   }
2443   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2444   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2445   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2446   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2447   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2448   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2453 {
2454   PetscErrorCode ierr;
2455   Mat            *dummy;
2456 
2457   PetscFunctionBegin;
2458   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2459   *newmat = *dummy;
2460   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2465 {
2466   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2467   PetscErrorCode ierr;
2468 
2469   PetscFunctionBegin;
2470   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2471   A->factorerrortype = a->A->factorerrortype;
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2476 {
2477   PetscErrorCode ierr;
2478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2479 
2480   PetscFunctionBegin;
2481   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2482   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2483   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2484   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2489 {
2490   PetscFunctionBegin;
2491   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2492   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 /*@
2497    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2498 
2499    Collective on Mat
2500 
2501    Input Parameters:
2502 +    A - the matrix
2503 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2504 
2505  Level: advanced
2506 
2507 @*/
2508 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2509 {
2510   PetscErrorCode       ierr;
2511 
2512   PetscFunctionBegin;
2513   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2518 {
2519   PetscErrorCode       ierr;
2520   PetscBool            sc = PETSC_FALSE,flg;
2521 
2522   PetscFunctionBegin;
2523   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2524   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2525   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2526   if (flg) {
2527     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2528   }
2529   ierr = PetscOptionsTail();CHKERRQ(ierr);
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2534 {
2535   PetscErrorCode ierr;
2536   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2537   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2538 
2539   PetscFunctionBegin;
2540   if (!Y->preallocated) {
2541     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2542   } else if (!aij->nz) {
2543     PetscInt nonew = aij->nonew;
2544     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2545     aij->nonew = nonew;
2546   }
2547   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2552 {
2553   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2554   PetscErrorCode ierr;
2555 
2556   PetscFunctionBegin;
2557   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2558   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2559   if (d) {
2560     PetscInt rstart;
2561     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2562     *d += rstart;
2563 
2564   }
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2569 {
2570   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2571   PetscErrorCode ierr;
2572 
2573   PetscFunctionBegin;
2574   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 /* -------------------------------------------------------------------*/
2579 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2580                                        MatGetRow_MPIAIJ,
2581                                        MatRestoreRow_MPIAIJ,
2582                                        MatMult_MPIAIJ,
2583                                 /* 4*/ MatMultAdd_MPIAIJ,
2584                                        MatMultTranspose_MPIAIJ,
2585                                        MatMultTransposeAdd_MPIAIJ,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                 /*10*/ 0,
2590                                        0,
2591                                        0,
2592                                        MatSOR_MPIAIJ,
2593                                        MatTranspose_MPIAIJ,
2594                                 /*15*/ MatGetInfo_MPIAIJ,
2595                                        MatEqual_MPIAIJ,
2596                                        MatGetDiagonal_MPIAIJ,
2597                                        MatDiagonalScale_MPIAIJ,
2598                                        MatNorm_MPIAIJ,
2599                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2600                                        MatAssemblyEnd_MPIAIJ,
2601                                        MatSetOption_MPIAIJ,
2602                                        MatZeroEntries_MPIAIJ,
2603                                 /*24*/ MatZeroRows_MPIAIJ,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                 /*29*/ MatSetUp_MPIAIJ,
2609                                        0,
2610                                        0,
2611                                        MatGetDiagonalBlock_MPIAIJ,
2612                                        0,
2613                                 /*34*/ MatDuplicate_MPIAIJ,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                 /*39*/ MatAXPY_MPIAIJ,
2619                                        MatCreateSubMatrices_MPIAIJ,
2620                                        MatIncreaseOverlap_MPIAIJ,
2621                                        MatGetValues_MPIAIJ,
2622                                        MatCopy_MPIAIJ,
2623                                 /*44*/ MatGetRowMax_MPIAIJ,
2624                                        MatScale_MPIAIJ,
2625                                        MatShift_MPIAIJ,
2626                                        MatDiagonalSet_MPIAIJ,
2627                                        MatZeroRowsColumns_MPIAIJ,
2628                                 /*49*/ MatSetRandom_MPIAIJ,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2634                                        0,
2635                                        MatSetUnfactored_MPIAIJ,
2636                                        MatPermute_MPIAIJ,
2637                                        0,
2638                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2639                                        MatDestroy_MPIAIJ,
2640                                        MatView_MPIAIJ,
2641                                        0,
2642                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2643                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2644                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2649                                        MatGetRowMinAbs_MPIAIJ,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*75*/ MatFDColoringApply_AIJ,
2655                                        MatSetFromOptions_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        MatFindZeroDiagonals_MPIAIJ,
2659                                 /*80*/ 0,
2660                                        0,
2661                                        0,
2662                                 /*83*/ MatLoad_MPIAIJ,
2663                                        MatIsSymmetric_MPIAIJ,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2669                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2670                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2671                                        MatPtAP_MPIAIJ_MPIAIJ,
2672                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2673                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                 /*99*/ 0,
2679                                        0,
2680                                        0,
2681                                        MatConjugate_MPIAIJ,
2682                                        0,
2683                                 /*104*/MatSetValuesRow_MPIAIJ,
2684                                        MatRealPart_MPIAIJ,
2685                                        MatImaginaryPart_MPIAIJ,
2686                                        0,
2687                                        0,
2688                                 /*109*/0,
2689                                        0,
2690                                        MatGetRowMin_MPIAIJ,
2691                                        0,
2692                                        MatMissingDiagonal_MPIAIJ,
2693                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2694                                        0,
2695                                        MatGetGhosts_MPIAIJ,
2696                                        0,
2697                                        0,
2698                                 /*119*/0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                        MatGetMultiProcBlock_MPIAIJ,
2703                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2704                                        MatGetColumnNorms_MPIAIJ,
2705                                        MatInvertBlockDiagonal_MPIAIJ,
2706                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2707                                        MatCreateSubMatricesMPI_MPIAIJ,
2708                                 /*129*/0,
2709                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2710                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2711                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2712                                        0,
2713                                 /*134*/0,
2714                                        0,
2715                                        MatRARt_MPIAIJ_MPIAIJ,
2716                                        0,
2717                                        0,
2718                                 /*139*/MatSetBlockSizes_MPIAIJ,
2719                                        0,
2720                                        0,
2721                                        MatFDColoringSetUp_MPIXAIJ,
2722                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2723                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2724 };
2725 
2726 /* ----------------------------------------------------------------------------------------*/
2727 
2728 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2729 {
2730   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2731   PetscErrorCode ierr;
2732 
2733   PetscFunctionBegin;
2734   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2735   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2740 {
2741   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2742   PetscErrorCode ierr;
2743 
2744   PetscFunctionBegin;
2745   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2746   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2747   PetscFunctionReturn(0);
2748 }
2749 
2750 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2751 {
2752   Mat_MPIAIJ     *b;
2753   PetscErrorCode ierr;
2754 
2755   PetscFunctionBegin;
2756   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2757   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2758   b = (Mat_MPIAIJ*)B->data;
2759 
2760 #if defined(PETSC_USE_CTABLE)
2761   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2762 #else
2763   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2764 #endif
2765   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2766   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2767   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2768 
2769   /* Because the B will have been resized we simply destroy it and create a new one each time */
2770   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2771   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2772   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2773   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2774   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2775   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2776 
2777   if (!B->preallocated) {
2778     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2779     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2780     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2781     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2782     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2783   }
2784 
2785   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2786   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2787   B->preallocated  = PETSC_TRUE;
2788   B->was_assembled = PETSC_FALSE;
2789   B->assembled     = PETSC_FALSE;;
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2794 {
2795   Mat_MPIAIJ     *b;
2796   PetscErrorCode ierr;
2797 
2798   PetscFunctionBegin;
2799   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2800   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2801   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2802   b = (Mat_MPIAIJ*)B->data;
2803 
2804 #if defined(PETSC_USE_CTABLE)
2805   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2806 #else
2807   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2808 #endif
2809   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2810   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2811   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2812 
2813   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2814   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2815   B->preallocated  = PETSC_TRUE;
2816   B->was_assembled = PETSC_FALSE;
2817   B->assembled = PETSC_FALSE;
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2822 {
2823   Mat            mat;
2824   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2825   PetscErrorCode ierr;
2826 
2827   PetscFunctionBegin;
2828   *newmat = 0;
2829   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2830   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2831   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2832   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2833   a       = (Mat_MPIAIJ*)mat->data;
2834 
2835   mat->factortype   = matin->factortype;
2836   mat->assembled    = PETSC_TRUE;
2837   mat->insertmode   = NOT_SET_VALUES;
2838   mat->preallocated = PETSC_TRUE;
2839 
2840   a->size         = oldmat->size;
2841   a->rank         = oldmat->rank;
2842   a->donotstash   = oldmat->donotstash;
2843   a->roworiented  = oldmat->roworiented;
2844   a->rowindices   = 0;
2845   a->rowvalues    = 0;
2846   a->getrowactive = PETSC_FALSE;
2847 
2848   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2849   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2850 
2851   if (oldmat->colmap) {
2852 #if defined(PETSC_USE_CTABLE)
2853     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2854 #else
2855     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2856     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2857     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2858 #endif
2859   } else a->colmap = 0;
2860   if (oldmat->garray) {
2861     PetscInt len;
2862     len  = oldmat->B->cmap->n;
2863     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2864     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2865     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2866   } else a->garray = 0;
2867 
2868   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2869   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2870   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2871   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2872 
2873   if (oldmat->Mvctx_mpi1) {
2874     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2875     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2876   }
2877 
2878   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2879   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2880   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2881   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2882   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2883   *newmat = mat;
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2888 {
2889   PetscBool      isbinary, ishdf5;
2890   PetscErrorCode ierr;
2891 
2892   PetscFunctionBegin;
2893   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2894   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2895   /* force binary viewer to load .info file if it has not yet done so */
2896   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2897   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2898   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2899   if (isbinary) {
2900     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2901   } else if (ishdf5) {
2902 #if defined(PETSC_HAVE_HDF5)
2903     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2904 #else
2905     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2906 #endif
2907   } else {
2908     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2909   }
2910   PetscFunctionReturn(0);
2911 }
2912 
2913 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2914 {
2915   PetscScalar    *vals,*svals;
2916   MPI_Comm       comm;
2917   PetscErrorCode ierr;
2918   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2919   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2920   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2921   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2922   PetscInt       cend,cstart,n,*rowners;
2923   int            fd;
2924   PetscInt       bs = newMat->rmap->bs;
2925 
2926   PetscFunctionBegin;
2927   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2928   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2929   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2930   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2931   if (!rank) {
2932     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2933     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2934     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2935   }
2936 
2937   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2938   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2939   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2940   if (bs < 0) bs = 1;
2941 
2942   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2943   M    = header[1]; N = header[2];
2944 
2945   /* If global sizes are set, check if they are consistent with that given in the file */
2946   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2947   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2948 
2949   /* determine ownership of all (block) rows */
2950   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2951   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2952   else m = newMat->rmap->n; /* Set by user */
2953 
2954   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2955   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2956 
2957   /* First process needs enough room for process with most rows */
2958   if (!rank) {
2959     mmax = rowners[1];
2960     for (i=2; i<=size; i++) {
2961       mmax = PetscMax(mmax, rowners[i]);
2962     }
2963   } else mmax = -1;             /* unused, but compilers complain */
2964 
2965   rowners[0] = 0;
2966   for (i=2; i<=size; i++) {
2967     rowners[i] += rowners[i-1];
2968   }
2969   rstart = rowners[rank];
2970   rend   = rowners[rank+1];
2971 
2972   /* distribute row lengths to all processors */
2973   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2974   if (!rank) {
2975     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2976     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2977     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2978     for (j=0; j<m; j++) {
2979       procsnz[0] += ourlens[j];
2980     }
2981     for (i=1; i<size; i++) {
2982       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2983       /* calculate the number of nonzeros on each processor */
2984       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2985         procsnz[i] += rowlengths[j];
2986       }
2987       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2988     }
2989     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2990   } else {
2991     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2992   }
2993 
2994   if (!rank) {
2995     /* determine max buffer needed and allocate it */
2996     maxnz = 0;
2997     for (i=0; i<size; i++) {
2998       maxnz = PetscMax(maxnz,procsnz[i]);
2999     }
3000     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3001 
3002     /* read in my part of the matrix column indices  */
3003     nz   = procsnz[0];
3004     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3005     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3006 
3007     /* read in every one elses and ship off */
3008     for (i=1; i<size; i++) {
3009       nz   = procsnz[i];
3010       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3011       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3012     }
3013     ierr = PetscFree(cols);CHKERRQ(ierr);
3014   } else {
3015     /* determine buffer space needed for message */
3016     nz = 0;
3017     for (i=0; i<m; i++) {
3018       nz += ourlens[i];
3019     }
3020     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3021 
3022     /* receive message of column indices*/
3023     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3024   }
3025 
3026   /* determine column ownership if matrix is not square */
3027   if (N != M) {
3028     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3029     else n = newMat->cmap->n;
3030     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3031     cstart = cend - n;
3032   } else {
3033     cstart = rstart;
3034     cend   = rend;
3035     n      = cend - cstart;
3036   }
3037 
3038   /* loop over local rows, determining number of off diagonal entries */
3039   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3040   jj   = 0;
3041   for (i=0; i<m; i++) {
3042     for (j=0; j<ourlens[i]; j++) {
3043       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3044       jj++;
3045     }
3046   }
3047 
3048   for (i=0; i<m; i++) {
3049     ourlens[i] -= offlens[i];
3050   }
3051   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3052 
3053   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3054 
3055   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3056 
3057   for (i=0; i<m; i++) {
3058     ourlens[i] += offlens[i];
3059   }
3060 
3061   if (!rank) {
3062     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3063 
3064     /* read in my part of the matrix numerical values  */
3065     nz   = procsnz[0];
3066     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3067 
3068     /* insert into matrix */
3069     jj      = rstart;
3070     smycols = mycols;
3071     svals   = vals;
3072     for (i=0; i<m; i++) {
3073       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3074       smycols += ourlens[i];
3075       svals   += ourlens[i];
3076       jj++;
3077     }
3078 
3079     /* read in other processors and ship out */
3080     for (i=1; i<size; i++) {
3081       nz   = procsnz[i];
3082       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3083       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3084     }
3085     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3086   } else {
3087     /* receive numeric values */
3088     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3089 
3090     /* receive message of values*/
3091     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3092 
3093     /* insert into matrix */
3094     jj      = rstart;
3095     smycols = mycols;
3096     svals   = vals;
3097     for (i=0; i<m; i++) {
3098       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3099       smycols += ourlens[i];
3100       svals   += ourlens[i];
3101       jj++;
3102     }
3103   }
3104   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3105   ierr = PetscFree(vals);CHKERRQ(ierr);
3106   ierr = PetscFree(mycols);CHKERRQ(ierr);
3107   ierr = PetscFree(rowners);CHKERRQ(ierr);
3108   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3109   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3110   PetscFunctionReturn(0);
3111 }
3112 
3113 /* Not scalable because of ISAllGather() unless getting all columns. */
3114 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3115 {
3116   PetscErrorCode ierr;
3117   IS             iscol_local;
3118   PetscBool      isstride;
3119   PetscMPIInt    lisstride=0,gisstride;
3120 
3121   PetscFunctionBegin;
3122   /* check if we are grabbing all columns*/
3123   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3124 
3125   if (isstride) {
3126     PetscInt  start,len,mstart,mlen;
3127     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3128     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3129     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3130     if (mstart == start && mlen-mstart == len) lisstride = 1;
3131   }
3132 
3133   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3134   if (gisstride) {
3135     PetscInt N;
3136     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3137     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3138     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3139     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3140   } else {
3141     PetscInt cbs;
3142     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3143     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3144     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3145   }
3146 
3147   *isseq = iscol_local;
3148   PetscFunctionReturn(0);
3149 }
3150 
3151 /*
3152  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3153  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3154 
3155  Input Parameters:
3156    mat - matrix
3157    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3158            i.e., mat->rstart <= isrow[i] < mat->rend
3159    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3160            i.e., mat->cstart <= iscol[i] < mat->cend
3161  Output Parameter:
3162    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3163    iscol_o - sequential column index set for retrieving mat->B
3164    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3165  */
3166 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3167 {
3168   PetscErrorCode ierr;
3169   Vec            x,cmap;
3170   const PetscInt *is_idx;
3171   PetscScalar    *xarray,*cmaparray;
3172   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3173   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3174   Mat            B=a->B;
3175   Vec            lvec=a->lvec,lcmap;
3176   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3177   MPI_Comm       comm;
3178   VecScatter     Mvctx=a->Mvctx;
3179 
3180   PetscFunctionBegin;
3181   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3182   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3183 
3184   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3185   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3186   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3187   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3188   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3189 
3190   /* Get start indices */
3191   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3192   isstart -= ncols;
3193   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3194 
3195   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3196   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3197   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3198   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3199   for (i=0; i<ncols; i++) {
3200     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3201     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3202     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3203   }
3204   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3205   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3206   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3207 
3208   /* Get iscol_d */
3209   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3210   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3211   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3212 
3213   /* Get isrow_d */
3214   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3215   rstart = mat->rmap->rstart;
3216   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3217   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3218   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3219   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3220 
3221   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3222   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3223   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3224 
3225   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3226   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3227   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3228 
3229   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3230 
3231   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3232   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3233 
3234   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3235   /* off-process column indices */
3236   count = 0;
3237   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3238   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3239 
3240   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3241   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3242   for (i=0; i<Bn; i++) {
3243     if (PetscRealPart(xarray[i]) > -1.0) {
3244       idx[count]     = i;                   /* local column index in off-diagonal part B */
3245       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3246       count++;
3247     }
3248   }
3249   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3250   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3251 
3252   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3253   /* cannot ensure iscol_o has same blocksize as iscol! */
3254 
3255   ierr = PetscFree(idx);CHKERRQ(ierr);
3256   *garray = cmap1;
3257 
3258   ierr = VecDestroy(&x);CHKERRQ(ierr);
3259   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3260   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3261   PetscFunctionReturn(0);
3262 }
3263 
3264 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3265 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3266 {
3267   PetscErrorCode ierr;
3268   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3269   Mat            M = NULL;
3270   MPI_Comm       comm;
3271   IS             iscol_d,isrow_d,iscol_o;
3272   Mat            Asub = NULL,Bsub = NULL;
3273   PetscInt       n;
3274 
3275   PetscFunctionBegin;
3276   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3277 
3278   if (call == MAT_REUSE_MATRIX) {
3279     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3280     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3281     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3282 
3283     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3284     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3285 
3286     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3287     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3288 
3289     /* Update diagonal and off-diagonal portions of submat */
3290     asub = (Mat_MPIAIJ*)(*submat)->data;
3291     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3292     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3293     if (n) {
3294       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3295     }
3296     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3297     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3298 
3299   } else { /* call == MAT_INITIAL_MATRIX) */
3300     const PetscInt *garray;
3301     PetscInt        BsubN;
3302 
3303     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3304     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3305 
3306     /* Create local submatrices Asub and Bsub */
3307     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3308     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3309 
3310     /* Create submatrix M */
3311     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3312 
3313     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3314     asub = (Mat_MPIAIJ*)M->data;
3315 
3316     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3317     n = asub->B->cmap->N;
3318     if (BsubN > n) {
3319       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3320       const PetscInt *idx;
3321       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3322       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3323 
3324       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3325       j = 0;
3326       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3327       for (i=0; i<n; i++) {
3328         if (j >= BsubN) break;
3329         while (subgarray[i] > garray[j]) j++;
3330 
3331         if (subgarray[i] == garray[j]) {
3332           idx_new[i] = idx[j++];
3333         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3334       }
3335       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3336 
3337       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3338       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3339 
3340     } else if (BsubN < n) {
3341       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3342     }
3343 
3344     ierr = PetscFree(garray);CHKERRQ(ierr);
3345     *submat = M;
3346 
3347     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3348     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3349     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3350 
3351     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3352     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3353 
3354     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3355     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3356   }
3357   PetscFunctionReturn(0);
3358 }
3359 
3360 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3361 {
3362   PetscErrorCode ierr;
3363   IS             iscol_local=NULL,isrow_d;
3364   PetscInt       csize;
3365   PetscInt       n,i,j,start,end;
3366   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3367   MPI_Comm       comm;
3368 
3369   PetscFunctionBegin;
3370   /* If isrow has same processor distribution as mat,
3371      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3372   if (call == MAT_REUSE_MATRIX) {
3373     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3374     if (isrow_d) {
3375       sameRowDist  = PETSC_TRUE;
3376       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3377     } else {
3378       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3379       if (iscol_local) {
3380         sameRowDist  = PETSC_TRUE;
3381         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3382       }
3383     }
3384   } else {
3385     /* Check if isrow has same processor distribution as mat */
3386     sameDist[0] = PETSC_FALSE;
3387     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3388     if (!n) {
3389       sameDist[0] = PETSC_TRUE;
3390     } else {
3391       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3392       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3393       if (i >= start && j < end) {
3394         sameDist[0] = PETSC_TRUE;
3395       }
3396     }
3397 
3398     /* Check if iscol has same processor distribution as mat */
3399     sameDist[1] = PETSC_FALSE;
3400     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3401     if (!n) {
3402       sameDist[1] = PETSC_TRUE;
3403     } else {
3404       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3405       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3406       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3407     }
3408 
3409     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3410     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3411     sameRowDist = tsameDist[0];
3412   }
3413 
3414   if (sameRowDist) {
3415     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3416       /* isrow and iscol have same processor distribution as mat */
3417       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3418       PetscFunctionReturn(0);
3419     } else { /* sameRowDist */
3420       /* isrow has same processor distribution as mat */
3421       if (call == MAT_INITIAL_MATRIX) {
3422         PetscBool sorted;
3423         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3424         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3425         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3426         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3427 
3428         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3429         if (sorted) {
3430           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3431           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3432           PetscFunctionReturn(0);
3433         }
3434       } else { /* call == MAT_REUSE_MATRIX */
3435         IS    iscol_sub;
3436         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3437         if (iscol_sub) {
3438           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3439           PetscFunctionReturn(0);
3440         }
3441       }
3442     }
3443   }
3444 
3445   /* General case: iscol -> iscol_local which has global size of iscol */
3446   if (call == MAT_REUSE_MATRIX) {
3447     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3448     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3449   } else {
3450     if (!iscol_local) {
3451       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3452     }
3453   }
3454 
3455   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3456   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3457 
3458   if (call == MAT_INITIAL_MATRIX) {
3459     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3460     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3461   }
3462   PetscFunctionReturn(0);
3463 }
3464 
3465 /*@C
3466      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3467          and "off-diagonal" part of the matrix in CSR format.
3468 
3469    Collective on MPI_Comm
3470 
3471    Input Parameters:
3472 +  comm - MPI communicator
3473 .  A - "diagonal" portion of matrix
3474 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3475 -  garray - global index of B columns
3476 
3477    Output Parameter:
3478 .   mat - the matrix, with input A as its local diagonal matrix
3479    Level: advanced
3480 
3481    Notes:
3482        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3483        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3484 
3485 .seealso: MatCreateMPIAIJWithSplitArrays()
3486 @*/
3487 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3488 {
3489   PetscErrorCode ierr;
3490   Mat_MPIAIJ     *maij;
3491   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3492   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3493   PetscScalar    *oa=b->a;
3494   Mat            Bnew;
3495   PetscInt       m,n,N;
3496 
3497   PetscFunctionBegin;
3498   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3499   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3500   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3501   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3502   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3503   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3504 
3505   /* Get global columns of mat */
3506   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3507 
3508   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3509   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3510   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3511   maij = (Mat_MPIAIJ*)(*mat)->data;
3512 
3513   (*mat)->preallocated = PETSC_TRUE;
3514 
3515   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3516   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3517 
3518   /* Set A as diagonal portion of *mat */
3519   maij->A = A;
3520 
3521   nz = oi[m];
3522   for (i=0; i<nz; i++) {
3523     col   = oj[i];
3524     oj[i] = garray[col];
3525   }
3526 
3527    /* Set Bnew as off-diagonal portion of *mat */
3528   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3529   bnew        = (Mat_SeqAIJ*)Bnew->data;
3530   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3531   maij->B     = Bnew;
3532 
3533   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3534 
3535   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3536   b->free_a       = PETSC_FALSE;
3537   b->free_ij      = PETSC_FALSE;
3538   ierr = MatDestroy(&B);CHKERRQ(ierr);
3539 
3540   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3541   bnew->free_a       = PETSC_TRUE;
3542   bnew->free_ij      = PETSC_TRUE;
3543 
3544   /* condense columns of maij->B */
3545   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3546   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3547   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3548   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3549   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3550   PetscFunctionReturn(0);
3551 }
3552 
3553 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3554 
3555 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3556 {
3557   PetscErrorCode ierr;
3558   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3559   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3560   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3561   Mat            M,Msub,B=a->B;
3562   MatScalar      *aa;
3563   Mat_SeqAIJ     *aij;
3564   PetscInt       *garray = a->garray,*colsub,Ncols;
3565   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3566   IS             iscol_sub,iscmap;
3567   const PetscInt *is_idx,*cmap;
3568   PetscBool      allcolumns=PETSC_FALSE;
3569   MPI_Comm       comm;
3570 
3571   PetscFunctionBegin;
3572   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3573 
3574   if (call == MAT_REUSE_MATRIX) {
3575     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3576     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3577     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3578 
3579     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3580     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3581 
3582     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3583     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3584 
3585     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3586 
3587   } else { /* call == MAT_INITIAL_MATRIX) */
3588     PetscBool flg;
3589 
3590     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3591     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3592 
3593     /* (1) iscol -> nonscalable iscol_local */
3594     /* Check for special case: each processor gets entire matrix columns */
3595     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3596     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3597     if (allcolumns) {
3598       iscol_sub = iscol_local;
3599       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3600       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3601 
3602     } else {
3603       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3604       PetscInt *idx,*cmap1,k;
3605       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3606       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3607       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3608       count = 0;
3609       k     = 0;
3610       for (i=0; i<Ncols; i++) {
3611         j = is_idx[i];
3612         if (j >= cstart && j < cend) {
3613           /* diagonal part of mat */
3614           idx[count]     = j;
3615           cmap1[count++] = i; /* column index in submat */
3616         } else if (Bn) {
3617           /* off-diagonal part of mat */
3618           if (j == garray[k]) {
3619             idx[count]     = j;
3620             cmap1[count++] = i;  /* column index in submat */
3621           } else if (j > garray[k]) {
3622             while (j > garray[k] && k < Bn-1) k++;
3623             if (j == garray[k]) {
3624               idx[count]     = j;
3625               cmap1[count++] = i; /* column index in submat */
3626             }
3627           }
3628         }
3629       }
3630       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3631 
3632       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3633       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3634       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3635 
3636       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3637     }
3638 
3639     /* (3) Create sequential Msub */
3640     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3641   }
3642 
3643   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3644   aij  = (Mat_SeqAIJ*)(Msub)->data;
3645   ii   = aij->i;
3646   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3647 
3648   /*
3649       m - number of local rows
3650       Ncols - number of columns (same on all processors)
3651       rstart - first row in new global matrix generated
3652   */
3653   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3654 
3655   if (call == MAT_INITIAL_MATRIX) {
3656     /* (4) Create parallel newmat */
3657     PetscMPIInt    rank,size;
3658     PetscInt       csize;
3659 
3660     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3661     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3662 
3663     /*
3664         Determine the number of non-zeros in the diagonal and off-diagonal
3665         portions of the matrix in order to do correct preallocation
3666     */
3667 
3668     /* first get start and end of "diagonal" columns */
3669     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3670     if (csize == PETSC_DECIDE) {
3671       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3672       if (mglobal == Ncols) { /* square matrix */
3673         nlocal = m;
3674       } else {
3675         nlocal = Ncols/size + ((Ncols % size) > rank);
3676       }
3677     } else {
3678       nlocal = csize;
3679     }
3680     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3681     rstart = rend - nlocal;
3682     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3683 
3684     /* next, compute all the lengths */
3685     jj    = aij->j;
3686     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3687     olens = dlens + m;
3688     for (i=0; i<m; i++) {
3689       jend = ii[i+1] - ii[i];
3690       olen = 0;
3691       dlen = 0;
3692       for (j=0; j<jend; j++) {
3693         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3694         else dlen++;
3695         jj++;
3696       }
3697       olens[i] = olen;
3698       dlens[i] = dlen;
3699     }
3700 
3701     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3702     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3703 
3704     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3705     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3706     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3707     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3708     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3709     ierr = PetscFree(dlens);CHKERRQ(ierr);
3710 
3711   } else { /* call == MAT_REUSE_MATRIX */
3712     M    = *newmat;
3713     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3714     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3715     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3716     /*
3717          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3718        rather than the slower MatSetValues().
3719     */
3720     M->was_assembled = PETSC_TRUE;
3721     M->assembled     = PETSC_FALSE;
3722   }
3723 
3724   /* (5) Set values of Msub to *newmat */
3725   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3726   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3727 
3728   jj   = aij->j;
3729   aa   = aij->a;
3730   for (i=0; i<m; i++) {
3731     row = rstart + i;
3732     nz  = ii[i+1] - ii[i];
3733     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3734     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3735     jj += nz; aa += nz;
3736   }
3737   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3738 
3739   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3740   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3741 
3742   ierr = PetscFree(colsub);CHKERRQ(ierr);
3743 
3744   /* save Msub, iscol_sub and iscmap used in processor for next request */
3745   if (call ==  MAT_INITIAL_MATRIX) {
3746     *newmat = M;
3747     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3748     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3749 
3750     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3751     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3752 
3753     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3754     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3755 
3756     if (iscol_local) {
3757       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3758       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3759     }
3760   }
3761   PetscFunctionReturn(0);
3762 }
3763 
3764 /*
3765     Not great since it makes two copies of the submatrix, first an SeqAIJ
3766   in local and then by concatenating the local matrices the end result.
3767   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3768 
3769   Note: This requires a sequential iscol with all indices.
3770 */
3771 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3772 {
3773   PetscErrorCode ierr;
3774   PetscMPIInt    rank,size;
3775   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3776   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3777   Mat            M,Mreuse;
3778   MatScalar      *aa,*vwork;
3779   MPI_Comm       comm;
3780   Mat_SeqAIJ     *aij;
3781   PetscBool      colflag,allcolumns=PETSC_FALSE;
3782 
3783   PetscFunctionBegin;
3784   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3785   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3786   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3787 
3788   /* Check for special case: each processor gets entire matrix columns */
3789   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3790   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3791   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3792 
3793   if (call ==  MAT_REUSE_MATRIX) {
3794     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3795     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3796     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3797   } else {
3798     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3799   }
3800 
3801   /*
3802       m - number of local rows
3803       n - number of columns (same on all processors)
3804       rstart - first row in new global matrix generated
3805   */
3806   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3807   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3808   if (call == MAT_INITIAL_MATRIX) {
3809     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3810     ii  = aij->i;
3811     jj  = aij->j;
3812 
3813     /*
3814         Determine the number of non-zeros in the diagonal and off-diagonal
3815         portions of the matrix in order to do correct preallocation
3816     */
3817 
3818     /* first get start and end of "diagonal" columns */
3819     if (csize == PETSC_DECIDE) {
3820       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3821       if (mglobal == n) { /* square matrix */
3822         nlocal = m;
3823       } else {
3824         nlocal = n/size + ((n % size) > rank);
3825       }
3826     } else {
3827       nlocal = csize;
3828     }
3829     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3830     rstart = rend - nlocal;
3831     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3832 
3833     /* next, compute all the lengths */
3834     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3835     olens = dlens + m;
3836     for (i=0; i<m; i++) {
3837       jend = ii[i+1] - ii[i];
3838       olen = 0;
3839       dlen = 0;
3840       for (j=0; j<jend; j++) {
3841         if (*jj < rstart || *jj >= rend) olen++;
3842         else dlen++;
3843         jj++;
3844       }
3845       olens[i] = olen;
3846       dlens[i] = dlen;
3847     }
3848     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3849     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3850     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3851     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3852     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3853     ierr = PetscFree(dlens);CHKERRQ(ierr);
3854   } else {
3855     PetscInt ml,nl;
3856 
3857     M    = *newmat;
3858     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3859     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3860     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3861     /*
3862          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3863        rather than the slower MatSetValues().
3864     */
3865     M->was_assembled = PETSC_TRUE;
3866     M->assembled     = PETSC_FALSE;
3867   }
3868   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3869   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3870   ii   = aij->i;
3871   jj   = aij->j;
3872   aa   = aij->a;
3873   for (i=0; i<m; i++) {
3874     row   = rstart + i;
3875     nz    = ii[i+1] - ii[i];
3876     cwork = jj;     jj += nz;
3877     vwork = aa;     aa += nz;
3878     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3879   }
3880 
3881   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3882   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3883   *newmat = M;
3884 
3885   /* save submatrix used in processor for next request */
3886   if (call ==  MAT_INITIAL_MATRIX) {
3887     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3888     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3889   }
3890   PetscFunctionReturn(0);
3891 }
3892 
3893 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3894 {
3895   PetscInt       m,cstart, cend,j,nnz,i,d;
3896   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3897   const PetscInt *JJ;
3898   PetscScalar    *values;
3899   PetscErrorCode ierr;
3900   PetscBool      nooffprocentries;
3901 
3902   PetscFunctionBegin;
3903   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3904 
3905   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3906   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3907   m      = B->rmap->n;
3908   cstart = B->cmap->rstart;
3909   cend   = B->cmap->rend;
3910   rstart = B->rmap->rstart;
3911 
3912   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3913 
3914 #if defined(PETSC_USE_DEBUG)
3915   for (i=0; i<m && Ii; i++) {
3916     nnz = Ii[i+1]- Ii[i];
3917     JJ  = J + Ii[i];
3918     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3919     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3920     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3921   }
3922 #endif
3923 
3924   for (i=0; i<m && Ii; i++) {
3925     nnz     = Ii[i+1]- Ii[i];
3926     JJ      = J + Ii[i];
3927     nnz_max = PetscMax(nnz_max,nnz);
3928     d       = 0;
3929     for (j=0; j<nnz; j++) {
3930       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3931     }
3932     d_nnz[i] = d;
3933     o_nnz[i] = nnz - d;
3934   }
3935   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3936   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3937 
3938   if (v) values = (PetscScalar*)v;
3939   else {
3940     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3941   }
3942 
3943   for (i=0; i<m && Ii; i++) {
3944     ii   = i + rstart;
3945     nnz  = Ii[i+1]- Ii[i];
3946     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3947   }
3948   nooffprocentries    = B->nooffprocentries;
3949   B->nooffprocentries = PETSC_TRUE;
3950   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3951   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3952   B->nooffprocentries = nooffprocentries;
3953 
3954   if (!v) {
3955     ierr = PetscFree(values);CHKERRQ(ierr);
3956   }
3957   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3958   PetscFunctionReturn(0);
3959 }
3960 
3961 /*@
3962    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3963    (the default parallel PETSc format).
3964 
3965    Collective on MPI_Comm
3966 
3967    Input Parameters:
3968 +  B - the matrix
3969 .  i - the indices into j for the start of each local row (starts with zero)
3970 .  j - the column indices for each local row (starts with zero)
3971 -  v - optional values in the matrix
3972 
3973    Level: developer
3974 
3975    Notes:
3976        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3977      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3978      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3979 
3980        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3981 
3982        The format which is used for the sparse matrix input, is equivalent to a
3983     row-major ordering.. i.e for the following matrix, the input data expected is
3984     as shown
3985 
3986 $        1 0 0
3987 $        2 0 3     P0
3988 $       -------
3989 $        4 5 6     P1
3990 $
3991 $     Process0 [P0]: rows_owned=[0,1]
3992 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3993 $        j =  {0,0,2}  [size = 3]
3994 $        v =  {1,2,3}  [size = 3]
3995 $
3996 $     Process1 [P1]: rows_owned=[2]
3997 $        i =  {0,3}    [size = nrow+1  = 1+1]
3998 $        j =  {0,1,2}  [size = 3]
3999 $        v =  {4,5,6}  [size = 3]
4000 
4001 .keywords: matrix, aij, compressed row, sparse, parallel
4002 
4003 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4004           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4005 @*/
4006 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4007 {
4008   PetscErrorCode ierr;
4009 
4010   PetscFunctionBegin;
4011   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4012   PetscFunctionReturn(0);
4013 }
4014 
4015 /*@C
4016    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4017    (the default parallel PETSc format).  For good matrix assembly performance
4018    the user should preallocate the matrix storage by setting the parameters
4019    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4020    performance can be increased by more than a factor of 50.
4021 
4022    Collective on MPI_Comm
4023 
4024    Input Parameters:
4025 +  B - the matrix
4026 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4027            (same value is used for all local rows)
4028 .  d_nnz - array containing the number of nonzeros in the various rows of the
4029            DIAGONAL portion of the local submatrix (possibly different for each row)
4030            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4031            The size of this array is equal to the number of local rows, i.e 'm'.
4032            For matrices that will be factored, you must leave room for (and set)
4033            the diagonal entry even if it is zero.
4034 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4035            submatrix (same value is used for all local rows).
4036 -  o_nnz - array containing the number of nonzeros in the various rows of the
4037            OFF-DIAGONAL portion of the local submatrix (possibly different for
4038            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4039            structure. The size of this array is equal to the number
4040            of local rows, i.e 'm'.
4041 
4042    If the *_nnz parameter is given then the *_nz parameter is ignored
4043 
4044    The AIJ format (also called the Yale sparse matrix format or
4045    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4046    storage.  The stored row and column indices begin with zero.
4047    See Users-Manual: ch_mat for details.
4048 
4049    The parallel matrix is partitioned such that the first m0 rows belong to
4050    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4051    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4052 
4053    The DIAGONAL portion of the local submatrix of a processor can be defined
4054    as the submatrix which is obtained by extraction the part corresponding to
4055    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4056    first row that belongs to the processor, r2 is the last row belonging to
4057    the this processor, and c1-c2 is range of indices of the local part of a
4058    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4059    common case of a square matrix, the row and column ranges are the same and
4060    the DIAGONAL part is also square. The remaining portion of the local
4061    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4062 
4063    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4064 
4065    You can call MatGetInfo() to get information on how effective the preallocation was;
4066    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4067    You can also run with the option -info and look for messages with the string
4068    malloc in them to see if additional memory allocation was needed.
4069 
4070    Example usage:
4071 
4072    Consider the following 8x8 matrix with 34 non-zero values, that is
4073    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4074    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4075    as follows:
4076 
4077 .vb
4078             1  2  0  |  0  3  0  |  0  4
4079     Proc0   0  5  6  |  7  0  0  |  8  0
4080             9  0 10  | 11  0  0  | 12  0
4081     -------------------------------------
4082            13  0 14  | 15 16 17  |  0  0
4083     Proc1   0 18  0  | 19 20 21  |  0  0
4084             0  0  0  | 22 23  0  | 24  0
4085     -------------------------------------
4086     Proc2  25 26 27  |  0  0 28  | 29  0
4087            30  0  0  | 31 32 33  |  0 34
4088 .ve
4089 
4090    This can be represented as a collection of submatrices as:
4091 
4092 .vb
4093       A B C
4094       D E F
4095       G H I
4096 .ve
4097 
4098    Where the submatrices A,B,C are owned by proc0, D,E,F are
4099    owned by proc1, G,H,I are owned by proc2.
4100 
4101    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4102    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4103    The 'M','N' parameters are 8,8, and have the same values on all procs.
4104 
4105    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4106    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4107    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4108    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4109    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4110    matrix, ans [DF] as another SeqAIJ matrix.
4111 
4112    When d_nz, o_nz parameters are specified, d_nz storage elements are
4113    allocated for every row of the local diagonal submatrix, and o_nz
4114    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4115    One way to choose d_nz and o_nz is to use the max nonzerors per local
4116    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4117    In this case, the values of d_nz,o_nz are:
4118 .vb
4119      proc0 : dnz = 2, o_nz = 2
4120      proc1 : dnz = 3, o_nz = 2
4121      proc2 : dnz = 1, o_nz = 4
4122 .ve
4123    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4124    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4125    for proc3. i.e we are using 12+15+10=37 storage locations to store
4126    34 values.
4127 
4128    When d_nnz, o_nnz parameters are specified, the storage is specified
4129    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4130    In the above case the values for d_nnz,o_nnz are:
4131 .vb
4132      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4133      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4134      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4135 .ve
4136    Here the space allocated is sum of all the above values i.e 34, and
4137    hence pre-allocation is perfect.
4138 
4139    Level: intermediate
4140 
4141 .keywords: matrix, aij, compressed row, sparse, parallel
4142 
4143 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4144           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4145 @*/
4146 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4147 {
4148   PetscErrorCode ierr;
4149 
4150   PetscFunctionBegin;
4151   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4152   PetscValidType(B,1);
4153   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4154   PetscFunctionReturn(0);
4155 }
4156 
4157 /*@
4158      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4159          CSR format the local rows.
4160 
4161    Collective on MPI_Comm
4162 
4163    Input Parameters:
4164 +  comm - MPI communicator
4165 .  m - number of local rows (Cannot be PETSC_DECIDE)
4166 .  n - This value should be the same as the local size used in creating the
4167        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4168        calculated if N is given) For square matrices n is almost always m.
4169 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4170 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4171 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4172 .   j - column indices
4173 -   a - matrix values
4174 
4175    Output Parameter:
4176 .   mat - the matrix
4177 
4178    Level: intermediate
4179 
4180    Notes:
4181        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4182      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4183      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4184 
4185        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4186 
4187        The format which is used for the sparse matrix input, is equivalent to a
4188     row-major ordering.. i.e for the following matrix, the input data expected is
4189     as shown
4190 
4191 $        1 0 0
4192 $        2 0 3     P0
4193 $       -------
4194 $        4 5 6     P1
4195 $
4196 $     Process0 [P0]: rows_owned=[0,1]
4197 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4198 $        j =  {0,0,2}  [size = 3]
4199 $        v =  {1,2,3}  [size = 3]
4200 $
4201 $     Process1 [P1]: rows_owned=[2]
4202 $        i =  {0,3}    [size = nrow+1  = 1+1]
4203 $        j =  {0,1,2}  [size = 3]
4204 $        v =  {4,5,6}  [size = 3]
4205 
4206 .keywords: matrix, aij, compressed row, sparse, parallel
4207 
4208 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4209           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4210 @*/
4211 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4212 {
4213   PetscErrorCode ierr;
4214 
4215   PetscFunctionBegin;
4216   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4217   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4218   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4219   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4220   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4221   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4222   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4223   PetscFunctionReturn(0);
4224 }
4225 
4226 /*@C
4227    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4228    (the default parallel PETSc format).  For good matrix assembly performance
4229    the user should preallocate the matrix storage by setting the parameters
4230    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4231    performance can be increased by more than a factor of 50.
4232 
4233    Collective on MPI_Comm
4234 
4235    Input Parameters:
4236 +  comm - MPI communicator
4237 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4238            This value should be the same as the local size used in creating the
4239            y vector for the matrix-vector product y = Ax.
4240 .  n - This value should be the same as the local size used in creating the
4241        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4242        calculated if N is given) For square matrices n is almost always m.
4243 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4244 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4245 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4246            (same value is used for all local rows)
4247 .  d_nnz - array containing the number of nonzeros in the various rows of the
4248            DIAGONAL portion of the local submatrix (possibly different for each row)
4249            or NULL, if d_nz is used to specify the nonzero structure.
4250            The size of this array is equal to the number of local rows, i.e 'm'.
4251 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4252            submatrix (same value is used for all local rows).
4253 -  o_nnz - array containing the number of nonzeros in the various rows of the
4254            OFF-DIAGONAL portion of the local submatrix (possibly different for
4255            each row) or NULL, if o_nz is used to specify the nonzero
4256            structure. The size of this array is equal to the number
4257            of local rows, i.e 'm'.
4258 
4259    Output Parameter:
4260 .  A - the matrix
4261 
4262    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4263    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4264    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4265 
4266    Notes:
4267    If the *_nnz parameter is given then the *_nz parameter is ignored
4268 
4269    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4270    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4271    storage requirements for this matrix.
4272 
4273    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4274    processor than it must be used on all processors that share the object for
4275    that argument.
4276 
4277    The user MUST specify either the local or global matrix dimensions
4278    (possibly both).
4279 
4280    The parallel matrix is partitioned across processors such that the
4281    first m0 rows belong to process 0, the next m1 rows belong to
4282    process 1, the next m2 rows belong to process 2 etc.. where
4283    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4284    values corresponding to [m x N] submatrix.
4285 
4286    The columns are logically partitioned with the n0 columns belonging
4287    to 0th partition, the next n1 columns belonging to the next
4288    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4289 
4290    The DIAGONAL portion of the local submatrix on any given processor
4291    is the submatrix corresponding to the rows and columns m,n
4292    corresponding to the given processor. i.e diagonal matrix on
4293    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4294    etc. The remaining portion of the local submatrix [m x (N-n)]
4295    constitute the OFF-DIAGONAL portion. The example below better
4296    illustrates this concept.
4297 
4298    For a square global matrix we define each processor's diagonal portion
4299    to be its local rows and the corresponding columns (a square submatrix);
4300    each processor's off-diagonal portion encompasses the remainder of the
4301    local matrix (a rectangular submatrix).
4302 
4303    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4304 
4305    When calling this routine with a single process communicator, a matrix of
4306    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4307    type of communicator, use the construction mechanism
4308 .vb
4309      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4310 .ve
4311 
4312 $     MatCreate(...,&A);
4313 $     MatSetType(A,MATMPIAIJ);
4314 $     MatSetSizes(A, m,n,M,N);
4315 $     MatMPIAIJSetPreallocation(A,...);
4316 
4317    By default, this format uses inodes (identical nodes) when possible.
4318    We search for consecutive rows with the same nonzero structure, thereby
4319    reusing matrix information to achieve increased efficiency.
4320 
4321    Options Database Keys:
4322 +  -mat_no_inode  - Do not use inodes
4323 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4324 
4325 
4326 
4327    Example usage:
4328 
4329    Consider the following 8x8 matrix with 34 non-zero values, that is
4330    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4331    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4332    as follows
4333 
4334 .vb
4335             1  2  0  |  0  3  0  |  0  4
4336     Proc0   0  5  6  |  7  0  0  |  8  0
4337             9  0 10  | 11  0  0  | 12  0
4338     -------------------------------------
4339            13  0 14  | 15 16 17  |  0  0
4340     Proc1   0 18  0  | 19 20 21  |  0  0
4341             0  0  0  | 22 23  0  | 24  0
4342     -------------------------------------
4343     Proc2  25 26 27  |  0  0 28  | 29  0
4344            30  0  0  | 31 32 33  |  0 34
4345 .ve
4346 
4347    This can be represented as a collection of submatrices as
4348 
4349 .vb
4350       A B C
4351       D E F
4352       G H I
4353 .ve
4354 
4355    Where the submatrices A,B,C are owned by proc0, D,E,F are
4356    owned by proc1, G,H,I are owned by proc2.
4357 
4358    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4359    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4360    The 'M','N' parameters are 8,8, and have the same values on all procs.
4361 
4362    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4363    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4364    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4365    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4366    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4367    matrix, ans [DF] as another SeqAIJ matrix.
4368 
4369    When d_nz, o_nz parameters are specified, d_nz storage elements are
4370    allocated for every row of the local diagonal submatrix, and o_nz
4371    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4372    One way to choose d_nz and o_nz is to use the max nonzerors per local
4373    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4374    In this case, the values of d_nz,o_nz are
4375 .vb
4376      proc0 : dnz = 2, o_nz = 2
4377      proc1 : dnz = 3, o_nz = 2
4378      proc2 : dnz = 1, o_nz = 4
4379 .ve
4380    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4381    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4382    for proc3. i.e we are using 12+15+10=37 storage locations to store
4383    34 values.
4384 
4385    When d_nnz, o_nnz parameters are specified, the storage is specified
4386    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4387    In the above case the values for d_nnz,o_nnz are
4388 .vb
4389      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4390      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4391      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4392 .ve
4393    Here the space allocated is sum of all the above values i.e 34, and
4394    hence pre-allocation is perfect.
4395 
4396    Level: intermediate
4397 
4398 .keywords: matrix, aij, compressed row, sparse, parallel
4399 
4400 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4401           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4402 @*/
4403 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4404 {
4405   PetscErrorCode ierr;
4406   PetscMPIInt    size;
4407 
4408   PetscFunctionBegin;
4409   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4410   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4411   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4412   if (size > 1) {
4413     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4414     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4415   } else {
4416     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4417     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4418   }
4419   PetscFunctionReturn(0);
4420 }
4421 
4422 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4423 {
4424   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4425   PetscBool      flg;
4426   PetscErrorCode ierr;
4427 
4428   PetscFunctionBegin;
4429   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4430   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4431   if (Ad)     *Ad     = a->A;
4432   if (Ao)     *Ao     = a->B;
4433   if (colmap) *colmap = a->garray;
4434   PetscFunctionReturn(0);
4435 }
4436 
4437 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4438 {
4439   PetscErrorCode ierr;
4440   PetscInt       m,N,i,rstart,nnz,Ii;
4441   PetscInt       *indx;
4442   PetscScalar    *values;
4443 
4444   PetscFunctionBegin;
4445   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4446   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4447     PetscInt       *dnz,*onz,sum,bs,cbs;
4448 
4449     if (n == PETSC_DECIDE) {
4450       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4451     }
4452     /* Check sum(n) = N */
4453     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4454     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4455 
4456     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4457     rstart -= m;
4458 
4459     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4460     for (i=0; i<m; i++) {
4461       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4462       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4463       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4464     }
4465 
4466     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4467     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4468     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4469     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4470     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4471     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4472     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4473     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4474   }
4475 
4476   /* numeric phase */
4477   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4478   for (i=0; i<m; i++) {
4479     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4480     Ii   = i + rstart;
4481     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4482     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4483   }
4484   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4485   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4486   PetscFunctionReturn(0);
4487 }
4488 
4489 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4490 {
4491   PetscErrorCode    ierr;
4492   PetscMPIInt       rank;
4493   PetscInt          m,N,i,rstart,nnz;
4494   size_t            len;
4495   const PetscInt    *indx;
4496   PetscViewer       out;
4497   char              *name;
4498   Mat               B;
4499   const PetscScalar *values;
4500 
4501   PetscFunctionBegin;
4502   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4503   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4504   /* Should this be the type of the diagonal block of A? */
4505   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4506   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4507   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4508   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4509   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4510   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4511   for (i=0; i<m; i++) {
4512     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4513     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4514     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4515   }
4516   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4517   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4518 
4519   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4520   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4521   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4522   sprintf(name,"%s.%d",outfile,rank);
4523   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4524   ierr = PetscFree(name);CHKERRQ(ierr);
4525   ierr = MatView(B,out);CHKERRQ(ierr);
4526   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4527   ierr = MatDestroy(&B);CHKERRQ(ierr);
4528   PetscFunctionReturn(0);
4529 }
4530 
4531 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4532 {
4533   PetscErrorCode      ierr;
4534   Mat_Merge_SeqsToMPI *merge;
4535   PetscContainer      container;
4536 
4537   PetscFunctionBegin;
4538   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4539   if (container) {
4540     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4541     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4542     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4543     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4544     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4545     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4546     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4547     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4553     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4554     ierr = PetscFree(merge);CHKERRQ(ierr);
4555     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4556   }
4557   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4558   PetscFunctionReturn(0);
4559 }
4560 
4561 #include <../src/mat/utils/freespace.h>
4562 #include <petscbt.h>
4563 
4564 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4565 {
4566   PetscErrorCode      ierr;
4567   MPI_Comm            comm;
4568   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4569   PetscMPIInt         size,rank,taga,*len_s;
4570   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4571   PetscInt            proc,m;
4572   PetscInt            **buf_ri,**buf_rj;
4573   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4574   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4575   MPI_Request         *s_waits,*r_waits;
4576   MPI_Status          *status;
4577   MatScalar           *aa=a->a;
4578   MatScalar           **abuf_r,*ba_i;
4579   Mat_Merge_SeqsToMPI *merge;
4580   PetscContainer      container;
4581 
4582   PetscFunctionBegin;
4583   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4584   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4585 
4586   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4587   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4588 
4589   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4590   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4591 
4592   bi     = merge->bi;
4593   bj     = merge->bj;
4594   buf_ri = merge->buf_ri;
4595   buf_rj = merge->buf_rj;
4596 
4597   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4598   owners = merge->rowmap->range;
4599   len_s  = merge->len_s;
4600 
4601   /* send and recv matrix values */
4602   /*-----------------------------*/
4603   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4604   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4605 
4606   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4607   for (proc=0,k=0; proc<size; proc++) {
4608     if (!len_s[proc]) continue;
4609     i    = owners[proc];
4610     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4611     k++;
4612   }
4613 
4614   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4615   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4616   ierr = PetscFree(status);CHKERRQ(ierr);
4617 
4618   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4619   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4620 
4621   /* insert mat values of mpimat */
4622   /*----------------------------*/
4623   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4624   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4625 
4626   for (k=0; k<merge->nrecv; k++) {
4627     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4628     nrows       = *(buf_ri_k[k]);
4629     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4630     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4631   }
4632 
4633   /* set values of ba */
4634   m = merge->rowmap->n;
4635   for (i=0; i<m; i++) {
4636     arow = owners[rank] + i;
4637     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4638     bnzi = bi[i+1] - bi[i];
4639     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4640 
4641     /* add local non-zero vals of this proc's seqmat into ba */
4642     anzi   = ai[arow+1] - ai[arow];
4643     aj     = a->j + ai[arow];
4644     aa     = a->a + ai[arow];
4645     nextaj = 0;
4646     for (j=0; nextaj<anzi; j++) {
4647       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4648         ba_i[j] += aa[nextaj++];
4649       }
4650     }
4651 
4652     /* add received vals into ba */
4653     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4654       /* i-th row */
4655       if (i == *nextrow[k]) {
4656         anzi   = *(nextai[k]+1) - *nextai[k];
4657         aj     = buf_rj[k] + *(nextai[k]);
4658         aa     = abuf_r[k] + *(nextai[k]);
4659         nextaj = 0;
4660         for (j=0; nextaj<anzi; j++) {
4661           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4662             ba_i[j] += aa[nextaj++];
4663           }
4664         }
4665         nextrow[k]++; nextai[k]++;
4666       }
4667     }
4668     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4669   }
4670   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4671   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4672 
4673   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4674   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4675   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4676   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4677   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4678   PetscFunctionReturn(0);
4679 }
4680 
4681 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4682 {
4683   PetscErrorCode      ierr;
4684   Mat                 B_mpi;
4685   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4686   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4687   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4688   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4689   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4690   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4691   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4692   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4693   MPI_Status          *status;
4694   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4695   PetscBT             lnkbt;
4696   Mat_Merge_SeqsToMPI *merge;
4697   PetscContainer      container;
4698 
4699   PetscFunctionBegin;
4700   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4701 
4702   /* make sure it is a PETSc comm */
4703   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4704   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4705   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4706 
4707   ierr = PetscNew(&merge);CHKERRQ(ierr);
4708   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4709 
4710   /* determine row ownership */
4711   /*---------------------------------------------------------*/
4712   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4713   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4714   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4715   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4716   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4717   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4718   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4719 
4720   m      = merge->rowmap->n;
4721   owners = merge->rowmap->range;
4722 
4723   /* determine the number of messages to send, their lengths */
4724   /*---------------------------------------------------------*/
4725   len_s = merge->len_s;
4726 
4727   len          = 0; /* length of buf_si[] */
4728   merge->nsend = 0;
4729   for (proc=0; proc<size; proc++) {
4730     len_si[proc] = 0;
4731     if (proc == rank) {
4732       len_s[proc] = 0;
4733     } else {
4734       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4735       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4736     }
4737     if (len_s[proc]) {
4738       merge->nsend++;
4739       nrows = 0;
4740       for (i=owners[proc]; i<owners[proc+1]; i++) {
4741         if (ai[i+1] > ai[i]) nrows++;
4742       }
4743       len_si[proc] = 2*(nrows+1);
4744       len         += len_si[proc];
4745     }
4746   }
4747 
4748   /* determine the number and length of messages to receive for ij-structure */
4749   /*-------------------------------------------------------------------------*/
4750   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4751   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4752 
4753   /* post the Irecv of j-structure */
4754   /*-------------------------------*/
4755   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4756   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4757 
4758   /* post the Isend of j-structure */
4759   /*--------------------------------*/
4760   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4761 
4762   for (proc=0, k=0; proc<size; proc++) {
4763     if (!len_s[proc]) continue;
4764     i    = owners[proc];
4765     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4766     k++;
4767   }
4768 
4769   /* receives and sends of j-structure are complete */
4770   /*------------------------------------------------*/
4771   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4772   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4773 
4774   /* send and recv i-structure */
4775   /*---------------------------*/
4776   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4777   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4778 
4779   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4780   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4781   for (proc=0,k=0; proc<size; proc++) {
4782     if (!len_s[proc]) continue;
4783     /* form outgoing message for i-structure:
4784          buf_si[0]:                 nrows to be sent
4785                [1:nrows]:           row index (global)
4786                [nrows+1:2*nrows+1]: i-structure index
4787     */
4788     /*-------------------------------------------*/
4789     nrows       = len_si[proc]/2 - 1;
4790     buf_si_i    = buf_si + nrows+1;
4791     buf_si[0]   = nrows;
4792     buf_si_i[0] = 0;
4793     nrows       = 0;
4794     for (i=owners[proc]; i<owners[proc+1]; i++) {
4795       anzi = ai[i+1] - ai[i];
4796       if (anzi) {
4797         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4798         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4799         nrows++;
4800       }
4801     }
4802     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4803     k++;
4804     buf_si += len_si[proc];
4805   }
4806 
4807   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4808   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4809 
4810   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4811   for (i=0; i<merge->nrecv; i++) {
4812     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4813   }
4814 
4815   ierr = PetscFree(len_si);CHKERRQ(ierr);
4816   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4817   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4818   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4819   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4820   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4821   ierr = PetscFree(status);CHKERRQ(ierr);
4822 
4823   /* compute a local seq matrix in each processor */
4824   /*----------------------------------------------*/
4825   /* allocate bi array and free space for accumulating nonzero column info */
4826   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4827   bi[0] = 0;
4828 
4829   /* create and initialize a linked list */
4830   nlnk = N+1;
4831   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4832 
4833   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4834   len  = ai[owners[rank+1]] - ai[owners[rank]];
4835   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4836 
4837   current_space = free_space;
4838 
4839   /* determine symbolic info for each local row */
4840   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4841 
4842   for (k=0; k<merge->nrecv; k++) {
4843     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4844     nrows       = *buf_ri_k[k];
4845     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4846     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4847   }
4848 
4849   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4850   len  = 0;
4851   for (i=0; i<m; i++) {
4852     bnzi = 0;
4853     /* add local non-zero cols of this proc's seqmat into lnk */
4854     arow  = owners[rank] + i;
4855     anzi  = ai[arow+1] - ai[arow];
4856     aj    = a->j + ai[arow];
4857     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4858     bnzi += nlnk;
4859     /* add received col data into lnk */
4860     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4861       if (i == *nextrow[k]) { /* i-th row */
4862         anzi  = *(nextai[k]+1) - *nextai[k];
4863         aj    = buf_rj[k] + *nextai[k];
4864         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4865         bnzi += nlnk;
4866         nextrow[k]++; nextai[k]++;
4867       }
4868     }
4869     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4870 
4871     /* if free space is not available, make more free space */
4872     if (current_space->local_remaining<bnzi) {
4873       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4874       nspacedouble++;
4875     }
4876     /* copy data into free space, then initialize lnk */
4877     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4878     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4879 
4880     current_space->array           += bnzi;
4881     current_space->local_used      += bnzi;
4882     current_space->local_remaining -= bnzi;
4883 
4884     bi[i+1] = bi[i] + bnzi;
4885   }
4886 
4887   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4888 
4889   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4890   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4891   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4892 
4893   /* create symbolic parallel matrix B_mpi */
4894   /*---------------------------------------*/
4895   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4896   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4897   if (n==PETSC_DECIDE) {
4898     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4899   } else {
4900     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4901   }
4902   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4903   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4904   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4905   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4906   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4907 
4908   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4909   B_mpi->assembled    = PETSC_FALSE;
4910   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4911   merge->bi           = bi;
4912   merge->bj           = bj;
4913   merge->buf_ri       = buf_ri;
4914   merge->buf_rj       = buf_rj;
4915   merge->coi          = NULL;
4916   merge->coj          = NULL;
4917   merge->owners_co    = NULL;
4918 
4919   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4920 
4921   /* attach the supporting struct to B_mpi for reuse */
4922   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4923   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4924   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4925   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4926   *mpimat = B_mpi;
4927 
4928   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4929   PetscFunctionReturn(0);
4930 }
4931 
4932 /*@C
4933       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4934                  matrices from each processor
4935 
4936     Collective on MPI_Comm
4937 
4938    Input Parameters:
4939 +    comm - the communicators the parallel matrix will live on
4940 .    seqmat - the input sequential matrices
4941 .    m - number of local rows (or PETSC_DECIDE)
4942 .    n - number of local columns (or PETSC_DECIDE)
4943 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4944 
4945    Output Parameter:
4946 .    mpimat - the parallel matrix generated
4947 
4948     Level: advanced
4949 
4950    Notes:
4951      The dimensions of the sequential matrix in each processor MUST be the same.
4952      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4953      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4954 @*/
4955 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4956 {
4957   PetscErrorCode ierr;
4958   PetscMPIInt    size;
4959 
4960   PetscFunctionBegin;
4961   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4962   if (size == 1) {
4963     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4964     if (scall == MAT_INITIAL_MATRIX) {
4965       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4966     } else {
4967       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4968     }
4969     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4970     PetscFunctionReturn(0);
4971   }
4972   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4973   if (scall == MAT_INITIAL_MATRIX) {
4974     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4975   }
4976   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4977   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4978   PetscFunctionReturn(0);
4979 }
4980 
4981 /*@
4982      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4983           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4984           with MatGetSize()
4985 
4986     Not Collective
4987 
4988    Input Parameters:
4989 +    A - the matrix
4990 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4991 
4992    Output Parameter:
4993 .    A_loc - the local sequential matrix generated
4994 
4995     Level: developer
4996 
4997 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4998 
4999 @*/
5000 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5001 {
5002   PetscErrorCode ierr;
5003   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5004   Mat_SeqAIJ     *mat,*a,*b;
5005   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5006   MatScalar      *aa,*ba,*cam;
5007   PetscScalar    *ca;
5008   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5009   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5010   PetscBool      match;
5011   MPI_Comm       comm;
5012   PetscMPIInt    size;
5013 
5014   PetscFunctionBegin;
5015   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5016   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5017   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5018   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5019   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5020 
5021   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5022   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5023   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5024   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5025   aa = a->a; ba = b->a;
5026   if (scall == MAT_INITIAL_MATRIX) {
5027     if (size == 1) {
5028       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5029       PetscFunctionReturn(0);
5030     }
5031 
5032     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5033     ci[0] = 0;
5034     for (i=0; i<am; i++) {
5035       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5036     }
5037     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5038     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5039     k    = 0;
5040     for (i=0; i<am; i++) {
5041       ncols_o = bi[i+1] - bi[i];
5042       ncols_d = ai[i+1] - ai[i];
5043       /* off-diagonal portion of A */
5044       for (jo=0; jo<ncols_o; jo++) {
5045         col = cmap[*bj];
5046         if (col >= cstart) break;
5047         cj[k]   = col; bj++;
5048         ca[k++] = *ba++;
5049       }
5050       /* diagonal portion of A */
5051       for (j=0; j<ncols_d; j++) {
5052         cj[k]   = cstart + *aj++;
5053         ca[k++] = *aa++;
5054       }
5055       /* off-diagonal portion of A */
5056       for (j=jo; j<ncols_o; j++) {
5057         cj[k]   = cmap[*bj++];
5058         ca[k++] = *ba++;
5059       }
5060     }
5061     /* put together the new matrix */
5062     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5063     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5064     /* Since these are PETSc arrays, change flags to free them as necessary. */
5065     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5066     mat->free_a  = PETSC_TRUE;
5067     mat->free_ij = PETSC_TRUE;
5068     mat->nonew   = 0;
5069   } else if (scall == MAT_REUSE_MATRIX) {
5070     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5071     ci = mat->i; cj = mat->j; cam = mat->a;
5072     for (i=0; i<am; i++) {
5073       /* off-diagonal portion of A */
5074       ncols_o = bi[i+1] - bi[i];
5075       for (jo=0; jo<ncols_o; jo++) {
5076         col = cmap[*bj];
5077         if (col >= cstart) break;
5078         *cam++ = *ba++; bj++;
5079       }
5080       /* diagonal portion of A */
5081       ncols_d = ai[i+1] - ai[i];
5082       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5083       /* off-diagonal portion of A */
5084       for (j=jo; j<ncols_o; j++) {
5085         *cam++ = *ba++; bj++;
5086       }
5087     }
5088   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5089   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5090   PetscFunctionReturn(0);
5091 }
5092 
5093 /*@C
5094      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5095 
5096     Not Collective
5097 
5098    Input Parameters:
5099 +    A - the matrix
5100 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5101 -    row, col - index sets of rows and columns to extract (or NULL)
5102 
5103    Output Parameter:
5104 .    A_loc - the local sequential matrix generated
5105 
5106     Level: developer
5107 
5108 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5109 
5110 @*/
5111 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5112 {
5113   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5114   PetscErrorCode ierr;
5115   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5116   IS             isrowa,iscola;
5117   Mat            *aloc;
5118   PetscBool      match;
5119 
5120   PetscFunctionBegin;
5121   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5122   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5123   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5124   if (!row) {
5125     start = A->rmap->rstart; end = A->rmap->rend;
5126     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5127   } else {
5128     isrowa = *row;
5129   }
5130   if (!col) {
5131     start = A->cmap->rstart;
5132     cmap  = a->garray;
5133     nzA   = a->A->cmap->n;
5134     nzB   = a->B->cmap->n;
5135     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5136     ncols = 0;
5137     for (i=0; i<nzB; i++) {
5138       if (cmap[i] < start) idx[ncols++] = cmap[i];
5139       else break;
5140     }
5141     imark = i;
5142     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5143     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5144     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5145   } else {
5146     iscola = *col;
5147   }
5148   if (scall != MAT_INITIAL_MATRIX) {
5149     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5150     aloc[0] = *A_loc;
5151   }
5152   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5153   if (!col) { /* attach global id of condensed columns */
5154     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5155   }
5156   *A_loc = aloc[0];
5157   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5158   if (!row) {
5159     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5160   }
5161   if (!col) {
5162     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5163   }
5164   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5165   PetscFunctionReturn(0);
5166 }
5167 
5168 /*@C
5169     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5170 
5171     Collective on Mat
5172 
5173    Input Parameters:
5174 +    A,B - the matrices in mpiaij format
5175 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5176 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5177 
5178    Output Parameter:
5179 +    rowb, colb - index sets of rows and columns of B to extract
5180 -    B_seq - the sequential matrix generated
5181 
5182     Level: developer
5183 
5184 @*/
5185 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5186 {
5187   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5188   PetscErrorCode ierr;
5189   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5190   IS             isrowb,iscolb;
5191   Mat            *bseq=NULL;
5192 
5193   PetscFunctionBegin;
5194   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5195     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5196   }
5197   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5198 
5199   if (scall == MAT_INITIAL_MATRIX) {
5200     start = A->cmap->rstart;
5201     cmap  = a->garray;
5202     nzA   = a->A->cmap->n;
5203     nzB   = a->B->cmap->n;
5204     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5205     ncols = 0;
5206     for (i=0; i<nzB; i++) {  /* row < local row index */
5207       if (cmap[i] < start) idx[ncols++] = cmap[i];
5208       else break;
5209     }
5210     imark = i;
5211     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5212     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5213     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5214     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5215   } else {
5216     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5217     isrowb  = *rowb; iscolb = *colb;
5218     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5219     bseq[0] = *B_seq;
5220   }
5221   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5222   *B_seq = bseq[0];
5223   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5224   if (!rowb) {
5225     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5226   } else {
5227     *rowb = isrowb;
5228   }
5229   if (!colb) {
5230     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5231   } else {
5232     *colb = iscolb;
5233   }
5234   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5235   PetscFunctionReturn(0);
5236 }
5237 
5238 #include <petsc/private/vecscatterimpl.h>
5239 /*
5240     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5241     of the OFF-DIAGONAL portion of local A
5242 
5243     Collective on Mat
5244 
5245    Input Parameters:
5246 +    A,B - the matrices in mpiaij format
5247 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5248 
5249    Output Parameter:
5250 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5251 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5252 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5253 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5254 
5255     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5256      for this matrix. This is not desirable..
5257 
5258     Level: developer
5259 
5260 */
5261 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5262 {
5263   VecScatter_MPI_General *gen_to,*gen_from;
5264   PetscErrorCode         ierr;
5265   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5266   Mat_SeqAIJ             *b_oth;
5267   VecScatter             ctx;
5268   MPI_Comm               comm;
5269   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5270   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5271   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5272   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5273   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5274   MPI_Request            *rwaits = NULL,*swaits = NULL;
5275   MPI_Status             *sstatus,rstatus;
5276   PetscMPIInt            jj,size;
5277   VecScatterType         type;
5278   PetscBool              mpi1;
5279 
5280   PetscFunctionBegin;
5281   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5282   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5283 
5284   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5285     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5286   }
5287   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5288   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5289 
5290   if (size == 1) {
5291     startsj_s = NULL;
5292     bufa_ptr  = NULL;
5293     *B_oth    = NULL;
5294     PetscFunctionReturn(0);
5295   }
5296 
5297   ctx = a->Mvctx;
5298   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5299   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5300   if (!mpi1) {
5301     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5302      thus create a->Mvctx_mpi1 */
5303     if (!a->Mvctx_mpi1) {
5304       a->Mvctx_mpi1_flg = PETSC_TRUE;
5305       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5306     }
5307     ctx = a->Mvctx_mpi1;
5308   }
5309   tag = ((PetscObject)ctx)->tag;
5310 
5311   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5312   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5313   nrecvs   = gen_from->n;
5314   nsends   = gen_to->n;
5315 
5316   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5317   srow    = gen_to->indices;    /* local row index to be sent */
5318   sstarts = gen_to->starts;
5319   sprocs  = gen_to->procs;
5320   sstatus = gen_to->sstatus;
5321   sbs     = gen_to->bs;
5322   rstarts = gen_from->starts;
5323   rprocs  = gen_from->procs;
5324   rbs     = gen_from->bs;
5325 
5326   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5327   if (scall == MAT_INITIAL_MATRIX) {
5328     /* i-array */
5329     /*---------*/
5330     /*  post receives */
5331     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5332     for (i=0; i<nrecvs; i++) {
5333       rowlen = rvalues + rstarts[i]*rbs;
5334       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5335       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5336     }
5337 
5338     /* pack the outgoing message */
5339     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5340 
5341     sstartsj[0] = 0;
5342     rstartsj[0] = 0;
5343     len         = 0; /* total length of j or a array to be sent */
5344     k           = 0;
5345     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5346     for (i=0; i<nsends; i++) {
5347       rowlen = svalues + sstarts[i]*sbs;
5348       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5349       for (j=0; j<nrows; j++) {
5350         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5351         for (l=0; l<sbs; l++) {
5352           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5353 
5354           rowlen[j*sbs+l] = ncols;
5355 
5356           len += ncols;
5357           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5358         }
5359         k++;
5360       }
5361       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5362 
5363       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5364     }
5365     /* recvs and sends of i-array are completed */
5366     i = nrecvs;
5367     while (i--) {
5368       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5369     }
5370     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5371     ierr = PetscFree(svalues);CHKERRQ(ierr);
5372 
5373     /* allocate buffers for sending j and a arrays */
5374     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5375     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5376 
5377     /* create i-array of B_oth */
5378     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5379 
5380     b_othi[0] = 0;
5381     len       = 0; /* total length of j or a array to be received */
5382     k         = 0;
5383     for (i=0; i<nrecvs; i++) {
5384       rowlen = rvalues + rstarts[i]*rbs;
5385       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5386       for (j=0; j<nrows; j++) {
5387         b_othi[k+1] = b_othi[k] + rowlen[j];
5388         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5389         k++;
5390       }
5391       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5392     }
5393     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5394 
5395     /* allocate space for j and a arrrays of B_oth */
5396     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5397     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5398 
5399     /* j-array */
5400     /*---------*/
5401     /*  post receives of j-array */
5402     for (i=0; i<nrecvs; i++) {
5403       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5404       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5405     }
5406 
5407     /* pack the outgoing message j-array */
5408     k = 0;
5409     for (i=0; i<nsends; i++) {
5410       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5411       bufJ  = bufj+sstartsj[i];
5412       for (j=0; j<nrows; j++) {
5413         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5414         for (ll=0; ll<sbs; ll++) {
5415           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5416           for (l=0; l<ncols; l++) {
5417             *bufJ++ = cols[l];
5418           }
5419           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5420         }
5421       }
5422       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5423     }
5424 
5425     /* recvs and sends of j-array are completed */
5426     i = nrecvs;
5427     while (i--) {
5428       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5429     }
5430     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5431   } else if (scall == MAT_REUSE_MATRIX) {
5432     sstartsj = *startsj_s;
5433     rstartsj = *startsj_r;
5434     bufa     = *bufa_ptr;
5435     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5436     b_otha   = b_oth->a;
5437   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5438 
5439   /* a-array */
5440   /*---------*/
5441   /*  post receives of a-array */
5442   for (i=0; i<nrecvs; i++) {
5443     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5444     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5445   }
5446 
5447   /* pack the outgoing message a-array */
5448   k = 0;
5449   for (i=0; i<nsends; i++) {
5450     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5451     bufA  = bufa+sstartsj[i];
5452     for (j=0; j<nrows; j++) {
5453       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5454       for (ll=0; ll<sbs; ll++) {
5455         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5456         for (l=0; l<ncols; l++) {
5457           *bufA++ = vals[l];
5458         }
5459         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5460       }
5461     }
5462     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5463   }
5464   /* recvs and sends of a-array are completed */
5465   i = nrecvs;
5466   while (i--) {
5467     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5468   }
5469   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5470   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5471 
5472   if (scall == MAT_INITIAL_MATRIX) {
5473     /* put together the new matrix */
5474     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5475 
5476     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5477     /* Since these are PETSc arrays, change flags to free them as necessary. */
5478     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5479     b_oth->free_a  = PETSC_TRUE;
5480     b_oth->free_ij = PETSC_TRUE;
5481     b_oth->nonew   = 0;
5482 
5483     ierr = PetscFree(bufj);CHKERRQ(ierr);
5484     if (!startsj_s || !bufa_ptr) {
5485       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5486       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5487     } else {
5488       *startsj_s = sstartsj;
5489       *startsj_r = rstartsj;
5490       *bufa_ptr  = bufa;
5491     }
5492   }
5493   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5494   PetscFunctionReturn(0);
5495 }
5496 
5497 /*@C
5498   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5499 
5500   Not Collective
5501 
5502   Input Parameters:
5503 . A - The matrix in mpiaij format
5504 
5505   Output Parameter:
5506 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5507 . colmap - A map from global column index to local index into lvec
5508 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5509 
5510   Level: developer
5511 
5512 @*/
5513 #if defined(PETSC_USE_CTABLE)
5514 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5515 #else
5516 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5517 #endif
5518 {
5519   Mat_MPIAIJ *a;
5520 
5521   PetscFunctionBegin;
5522   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5523   PetscValidPointer(lvec, 2);
5524   PetscValidPointer(colmap, 3);
5525   PetscValidPointer(multScatter, 4);
5526   a = (Mat_MPIAIJ*) A->data;
5527   if (lvec) *lvec = a->lvec;
5528   if (colmap) *colmap = a->colmap;
5529   if (multScatter) *multScatter = a->Mvctx;
5530   PetscFunctionReturn(0);
5531 }
5532 
5533 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5534 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5535 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5536 #if defined(PETSC_HAVE_MKL_SPARSE)
5537 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5538 #endif
5539 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5540 #if defined(PETSC_HAVE_ELEMENTAL)
5541 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5542 #endif
5543 #if defined(PETSC_HAVE_HYPRE)
5544 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5545 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5546 #endif
5547 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5548 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5549 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5550 
5551 /*
5552     Computes (B'*A')' since computing B*A directly is untenable
5553 
5554                n                       p                          p
5555         (              )       (              )         (                  )
5556       m (      A       )  *  n (       B      )   =   m (         C        )
5557         (              )       (              )         (                  )
5558 
5559 */
5560 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5561 {
5562   PetscErrorCode ierr;
5563   Mat            At,Bt,Ct;
5564 
5565   PetscFunctionBegin;
5566   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5567   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5568   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5569   ierr = MatDestroy(&At);CHKERRQ(ierr);
5570   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5571   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5572   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5573   PetscFunctionReturn(0);
5574 }
5575 
5576 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5577 {
5578   PetscErrorCode ierr;
5579   PetscInt       m=A->rmap->n,n=B->cmap->n;
5580   Mat            Cmat;
5581 
5582   PetscFunctionBegin;
5583   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5584   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5585   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5586   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5587   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5588   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5589   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5590   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5591 
5592   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5593 
5594   *C = Cmat;
5595   PetscFunctionReturn(0);
5596 }
5597 
5598 /* ----------------------------------------------------------------*/
5599 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5600 {
5601   PetscErrorCode ierr;
5602 
5603   PetscFunctionBegin;
5604   if (scall == MAT_INITIAL_MATRIX) {
5605     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5606     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5607     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5608   }
5609   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5610   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5611   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5612   PetscFunctionReturn(0);
5613 }
5614 
5615 /*MC
5616    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5617 
5618    Options Database Keys:
5619 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5620 
5621   Level: beginner
5622 
5623 .seealso: MatCreateAIJ()
5624 M*/
5625 
5626 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5627 {
5628   Mat_MPIAIJ     *b;
5629   PetscErrorCode ierr;
5630   PetscMPIInt    size;
5631 
5632   PetscFunctionBegin;
5633   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5634 
5635   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5636   B->data       = (void*)b;
5637   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5638   B->assembled  = PETSC_FALSE;
5639   B->insertmode = NOT_SET_VALUES;
5640   b->size       = size;
5641 
5642   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5643 
5644   /* build cache for off array entries formed */
5645   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5646 
5647   b->donotstash  = PETSC_FALSE;
5648   b->colmap      = 0;
5649   b->garray      = 0;
5650   b->roworiented = PETSC_TRUE;
5651 
5652   /* stuff used for matrix vector multiply */
5653   b->lvec  = NULL;
5654   b->Mvctx = NULL;
5655 
5656   /* stuff for MatGetRow() */
5657   b->rowindices   = 0;
5658   b->rowvalues    = 0;
5659   b->getrowactive = PETSC_FALSE;
5660 
5661   /* flexible pointer used in CUSP/CUSPARSE classes */
5662   b->spptr = NULL;
5663 
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5674 #if defined(PETSC_HAVE_MKL_SPARSE)
5675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5676 #endif
5677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5679 #if defined(PETSC_HAVE_ELEMENTAL)
5680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5681 #endif
5682 #if defined(PETSC_HAVE_HYPRE)
5683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5684 #endif
5685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5690 #if defined(PETSC_HAVE_HYPRE)
5691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5692 #endif
5693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5694   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5695   PetscFunctionReturn(0);
5696 }
5697 
5698 /*@C
5699      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5700          and "off-diagonal" part of the matrix in CSR format.
5701 
5702    Collective on MPI_Comm
5703 
5704    Input Parameters:
5705 +  comm - MPI communicator
5706 .  m - number of local rows (Cannot be PETSC_DECIDE)
5707 .  n - This value should be the same as the local size used in creating the
5708        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5709        calculated if N is given) For square matrices n is almost always m.
5710 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5711 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5712 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5713 .   j - column indices
5714 .   a - matrix values
5715 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5716 .   oj - column indices
5717 -   oa - matrix values
5718 
5719    Output Parameter:
5720 .   mat - the matrix
5721 
5722    Level: advanced
5723 
5724    Notes:
5725        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5726        must free the arrays once the matrix has been destroyed and not before.
5727 
5728        The i and j indices are 0 based
5729 
5730        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5731 
5732        This sets local rows and cannot be used to set off-processor values.
5733 
5734        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5735        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5736        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5737        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5738        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5739        communication if it is known that only local entries will be set.
5740 
5741 .keywords: matrix, aij, compressed row, sparse, parallel
5742 
5743 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5744           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5745 @*/
5746 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5747 {
5748   PetscErrorCode ierr;
5749   Mat_MPIAIJ     *maij;
5750 
5751   PetscFunctionBegin;
5752   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5753   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5754   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5755   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5756   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5757   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5758   maij = (Mat_MPIAIJ*) (*mat)->data;
5759 
5760   (*mat)->preallocated = PETSC_TRUE;
5761 
5762   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5763   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5764 
5765   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5766   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5767 
5768   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5769   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5770   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5771   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5772 
5773   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5774   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5775   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5776   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5777   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5778   PetscFunctionReturn(0);
5779 }
5780 
5781 /*
5782     Special version for direct calls from Fortran
5783 */
5784 #include <petsc/private/fortranimpl.h>
5785 
5786 /* Change these macros so can be used in void function */
5787 #undef CHKERRQ
5788 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5789 #undef SETERRQ2
5790 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5791 #undef SETERRQ3
5792 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5793 #undef SETERRQ
5794 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5795 
5796 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5797 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5798 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5799 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5800 #else
5801 #endif
5802 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5803 {
5804   Mat            mat  = *mmat;
5805   PetscInt       m    = *mm, n = *mn;
5806   InsertMode     addv = *maddv;
5807   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5808   PetscScalar    value;
5809   PetscErrorCode ierr;
5810 
5811   MatCheckPreallocated(mat,1);
5812   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5813 
5814 #if defined(PETSC_USE_DEBUG)
5815   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5816 #endif
5817   {
5818     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5819     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5820     PetscBool roworiented = aij->roworiented;
5821 
5822     /* Some Variables required in the macro */
5823     Mat        A                 = aij->A;
5824     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5825     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5826     MatScalar  *aa               = a->a;
5827     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5828     Mat        B                 = aij->B;
5829     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5830     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5831     MatScalar  *ba               = b->a;
5832 
5833     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5834     PetscInt  nonew = a->nonew;
5835     MatScalar *ap1,*ap2;
5836 
5837     PetscFunctionBegin;
5838     for (i=0; i<m; i++) {
5839       if (im[i] < 0) continue;
5840 #if defined(PETSC_USE_DEBUG)
5841       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5842 #endif
5843       if (im[i] >= rstart && im[i] < rend) {
5844         row      = im[i] - rstart;
5845         lastcol1 = -1;
5846         rp1      = aj + ai[row];
5847         ap1      = aa + ai[row];
5848         rmax1    = aimax[row];
5849         nrow1    = ailen[row];
5850         low1     = 0;
5851         high1    = nrow1;
5852         lastcol2 = -1;
5853         rp2      = bj + bi[row];
5854         ap2      = ba + bi[row];
5855         rmax2    = bimax[row];
5856         nrow2    = bilen[row];
5857         low2     = 0;
5858         high2    = nrow2;
5859 
5860         for (j=0; j<n; j++) {
5861           if (roworiented) value = v[i*n+j];
5862           else value = v[i+j*m];
5863           if (in[j] >= cstart && in[j] < cend) {
5864             col = in[j] - cstart;
5865             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5866             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5867           } else if (in[j] < 0) continue;
5868 #if defined(PETSC_USE_DEBUG)
5869           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5870           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5871 #endif
5872           else {
5873             if (mat->was_assembled) {
5874               if (!aij->colmap) {
5875                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5876               }
5877 #if defined(PETSC_USE_CTABLE)
5878               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5879               col--;
5880 #else
5881               col = aij->colmap[in[j]] - 1;
5882 #endif
5883               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5884               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5885                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5886                 col  =  in[j];
5887                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5888                 B     = aij->B;
5889                 b     = (Mat_SeqAIJ*)B->data;
5890                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5891                 rp2   = bj + bi[row];
5892                 ap2   = ba + bi[row];
5893                 rmax2 = bimax[row];
5894                 nrow2 = bilen[row];
5895                 low2  = 0;
5896                 high2 = nrow2;
5897                 bm    = aij->B->rmap->n;
5898                 ba    = b->a;
5899               }
5900             } else col = in[j];
5901             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5902           }
5903         }
5904       } else if (!aij->donotstash) {
5905         if (roworiented) {
5906           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5907         } else {
5908           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5909         }
5910       }
5911     }
5912   }
5913   PetscFunctionReturnVoid();
5914 }
5915