xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fc6138e5bb0ddea085ccbafdbba92de7843135df) !
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *lrows;
787   PetscInt       r, len;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   /* get locally owned rows */
792   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
793   /* fix right hand side if needed */
794   if (x && b) {
795     const PetscScalar *xx;
796     PetscScalar       *bb;
797 
798     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
799     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
800     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
801     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
802     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
803   }
804   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
805   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
806   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
807     PetscBool cong;
808     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
809     if (cong) A->congruentlayouts = 1;
810     else      A->congruentlayouts = 0;
811   }
812   if ((diag != 0.0) && A->congruentlayouts) {
813     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
814   } else if (diag != 0.0) {
815     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827 
828   /* only change matrix nonzero state if pattern was allowed to be changed */
829   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
830     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
831     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,p = 0,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x) {
888     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
891     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
892   }
893   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
894   /* remove zeroed rows of off diagonal matrix */
895   ii = aij->i;
896   for (i=0; i<len; i++) {
897     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
898   }
899   /* loop over all elements of off process part of matrix zeroing removed columns*/
900   if (aij->compressedrow.use) {
901     m    = aij->compressedrow.nrows;
902     ii   = aij->compressedrow.i;
903     ridx = aij->compressedrow.rindex;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908 
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[*ridx] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917       ridx++;
918     }
919   } else { /* do not use compressed row format */
920     m = l->B->rmap->n;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[i] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933     }
934   }
935   if (x) {
936     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
937     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
938   }
939   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
940   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPIAIJ"
953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
962   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultAdd_MPIAIJ"
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002   PetscBool      merged;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1006   /* do nondiagonal part */
1007   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1008   if (!merged) {
1009     /* send it on its way */
1010     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* receive remote parts: note this assumes the values are not actually */
1014     /* added in yy until the next line, */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   } else {
1017     /* do local part */
1018     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1019     /* send it on its way */
1020     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021     /* values actually were received in the Begin() but we need to call this nop */
1022     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1029 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1030 {
1031   MPI_Comm       comm;
1032   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1033   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1034   IS             Me,Notme;
1035   PetscErrorCode ierr;
1036   PetscInt       M,N,first,last,*notme,i;
1037   PetscMPIInt    size;
1038 
1039   PetscFunctionBegin;
1040   /* Easy test: symmetric diagonal block */
1041   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1042   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* send it on its way */
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   /* do local part */
1082   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* receive remote parts */
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*
1089   This only works correctly for square matrices where the subblock A->A is the
1090    diagonal block
1091 */
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1095 {
1096   PetscErrorCode ierr;
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1101   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1102   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatScale_MPIAIJ"
1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1115   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatDestroy_MPIAIJ"
1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1122 {
1123   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127 #if defined(PETSC_USE_LOG)
1128   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1129 #endif
1130   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1131   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1132   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1133   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1134 #if defined(PETSC_USE_CTABLE)
1135   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1136 #else
1137   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1138 #endif
1139   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1140   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1141   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1142   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1143   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1144   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1145 
1146   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1154   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1155 #if defined(PETSC_HAVE_ELEMENTAL)
1156   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1157 #endif
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1163 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1164 {
1165   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1166   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1167   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1168   PetscErrorCode ierr;
1169   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1170   int            fd;
1171   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1172   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1173   PetscScalar    *column_values;
1174   PetscInt       message_count,flowcontrolcount;
1175   FILE           *file;
1176 
1177   PetscFunctionBegin;
1178   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1180   nz   = A->nz + B->nz;
1181   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1182   if (!rank) {
1183     header[0] = MAT_FILE_CLASSID;
1184     header[1] = mat->rmap->N;
1185     header[2] = mat->cmap->N;
1186 
1187     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1188     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1189     /* get largest number of rows any processor has */
1190     rlen  = mat->rmap->n;
1191     range = mat->rmap->range;
1192     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1193   } else {
1194     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1195     rlen = mat->rmap->n;
1196   }
1197 
1198   /* load up the local row counts */
1199   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1200   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1201 
1202   /* store the row lengths to the file */
1203   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1204   if (!rank) {
1205     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1206     for (i=1; i<size; i++) {
1207       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1208       rlen = range[i+1] - range[i];
1209       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1210       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1211     }
1212     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1213   } else {
1214     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1215     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1216     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1217   }
1218   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1219 
1220   /* load up the local column indices */
1221   nzmax = nz; /* th processor needs space a largest processor needs */
1222   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1223   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1224   cnt   = 0;
1225   for (i=0; i<mat->rmap->n; i++) {
1226     for (j=B->i[i]; j<B->i[i+1]; j++) {
1227       if ((col = garray[B->j[j]]) > cstart) break;
1228       column_indices[cnt++] = col;
1229     }
1230     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1231     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1232   }
1233   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1234 
1235   /* store the column indices to the file */
1236   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1237   if (!rank) {
1238     MPI_Status status;
1239     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1240     for (i=1; i<size; i++) {
1241       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1242       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1243       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1244       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1245       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1246     }
1247     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1248   } else {
1249     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1250     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1251     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1252     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1255 
1256   /* load up the local column values */
1257   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1258   cnt  = 0;
1259   for (i=0; i<mat->rmap->n; i++) {
1260     for (j=B->i[i]; j<B->i[i+1]; j++) {
1261       if (garray[B->j[j]] > cstart) break;
1262       column_values[cnt++] = B->a[j];
1263     }
1264     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1265     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1266   }
1267   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1268 
1269   /* store the column values to the file */
1270   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1271   if (!rank) {
1272     MPI_Status status;
1273     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1274     for (i=1; i<size; i++) {
1275       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1276       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1277       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1278       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1279       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1280     }
1281     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1282   } else {
1283     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1284     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1286     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1287   }
1288   ierr = PetscFree(column_values);CHKERRQ(ierr);
1289 
1290   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1291   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #include <petscdraw.h>
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1298 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1299 {
1300   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1301   PetscErrorCode    ierr;
1302   PetscMPIInt       rank = aij->rank,size = aij->size;
1303   PetscBool         isdraw,iascii,isbinary;
1304   PetscViewer       sviewer;
1305   PetscViewerFormat format;
1306 
1307   PetscFunctionBegin;
1308   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1309   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1310   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1311   if (iascii) {
1312     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1313     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1314       MatInfo   info;
1315       PetscBool inodes;
1316 
1317       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1318       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1319       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1320       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1321       if (!inodes) {
1322         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1323                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1324       } else {
1325         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1326                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1327       }
1328       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1329       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1330       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1331       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1332       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1333       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1334       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1335       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1336       PetscFunctionReturn(0);
1337     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1338       PetscInt inodecount,inodelimit,*inodes;
1339       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1340       if (inodes) {
1341         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1342       } else {
1343         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1344       }
1345       PetscFunctionReturn(0);
1346     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1347       PetscFunctionReturn(0);
1348     }
1349   } else if (isbinary) {
1350     if (size == 1) {
1351       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1352       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1353     } else {
1354       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1355     }
1356     PetscFunctionReturn(0);
1357   } else if (isdraw) {
1358     PetscDraw draw;
1359     PetscBool isnull;
1360     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1361     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1362     if (isnull) PetscFunctionReturn(0);
1363   }
1364 
1365   {
1366     /* assemble the entire matrix onto first processor. */
1367     Mat        A;
1368     Mat_SeqAIJ *Aloc;
1369     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1370     MatScalar  *a;
1371 
1372     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1373     if (!rank) {
1374       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1375     } else {
1376       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1377     }
1378     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1379     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1380     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1381     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1382     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1383 
1384     /* copy over the A part */
1385     Aloc = (Mat_SeqAIJ*)aij->A->data;
1386     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1387     row  = mat->rmap->rstart;
1388     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1389     for (i=0; i<m; i++) {
1390       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1391       row++;
1392       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1393     }
1394     aj = Aloc->j;
1395     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1396 
1397     /* copy over the B part */
1398     Aloc = (Mat_SeqAIJ*)aij->B->data;
1399     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1400     row  = mat->rmap->rstart;
1401     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1402     ct   = cols;
1403     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1404     for (i=0; i<m; i++) {
1405       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1406       row++;
1407       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1408     }
1409     ierr = PetscFree(ct);CHKERRQ(ierr);
1410     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412     /*
1413        Everyone has to call to draw the matrix since the graphics waits are
1414        synchronized across all processors that share the PetscDraw object
1415     */
1416     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1417     if (!rank) {
1418       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1419       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1420     }
1421     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1422     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1423     ierr = MatDestroy(&A);CHKERRQ(ierr);
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatView_MPIAIJ"
1430 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1431 {
1432   PetscErrorCode ierr;
1433   PetscBool      iascii,isdraw,issocket,isbinary;
1434 
1435   PetscFunctionBegin;
1436   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1437   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1438   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1439   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1440   if (iascii || isdraw || isbinary || issocket) {
1441     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1442   }
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatSOR_MPIAIJ"
1448 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1449 {
1450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1451   PetscErrorCode ierr;
1452   Vec            bb1 = 0;
1453   PetscBool      hasop;
1454 
1455   PetscFunctionBegin;
1456   if (flag == SOR_APPLY_UPPER) {
1457     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1458     PetscFunctionReturn(0);
1459   }
1460 
1461   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1462     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1463   }
1464 
1465   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1466     if (flag & SOR_ZERO_INITIAL_GUESS) {
1467       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1468       its--;
1469     }
1470 
1471     while (its--) {
1472       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1473       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1474 
1475       /* update rhs: bb1 = bb - B*x */
1476       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1477       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1478 
1479       /* local sweep */
1480       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1481     }
1482   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1483     if (flag & SOR_ZERO_INITIAL_GUESS) {
1484       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1485       its--;
1486     }
1487     while (its--) {
1488       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1490 
1491       /* update rhs: bb1 = bb - B*x */
1492       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1493       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1494 
1495       /* local sweep */
1496       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1497     }
1498   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1499     if (flag & SOR_ZERO_INITIAL_GUESS) {
1500       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1501       its--;
1502     }
1503     while (its--) {
1504       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506 
1507       /* update rhs: bb1 = bb - B*x */
1508       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1509       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1510 
1511       /* local sweep */
1512       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1513     }
1514   } else if (flag & SOR_EISENSTAT) {
1515     Vec xx1;
1516 
1517     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1518     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1519 
1520     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522     if (!mat->diag) {
1523       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1524       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1525     }
1526     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1527     if (hasop) {
1528       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1529     } else {
1530       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1531     }
1532     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1533 
1534     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1535 
1536     /* local sweep */
1537     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1538     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1539     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1540   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1541 
1542   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1543 
1544   matin->errortype = mat->A->errortype;
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatPermute_MPIAIJ"
1550 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1551 {
1552   Mat            aA,aB,Aperm;
1553   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1554   PetscScalar    *aa,*ba;
1555   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1556   PetscSF        rowsf,sf;
1557   IS             parcolp = NULL;
1558   PetscBool      done;
1559   PetscErrorCode ierr;
1560 
1561   PetscFunctionBegin;
1562   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1563   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1564   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1565   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1566 
1567   /* Invert row permutation to find out where my rows should go */
1568   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1569   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1570   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1571   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1572   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1573   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1574 
1575   /* Invert column permutation to find out where my columns should go */
1576   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1577   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1578   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1579   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1580   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1581   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1582   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1583 
1584   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1585   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1586   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1587 
1588   /* Find out where my gcols should go */
1589   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1590   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1591   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1592   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1593   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1594   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1595   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1596   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1597 
1598   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1599   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1600   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1601   for (i=0; i<m; i++) {
1602     PetscInt row = rdest[i],rowner;
1603     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1604     for (j=ai[i]; j<ai[i+1]; j++) {
1605       PetscInt cowner,col = cdest[aj[j]];
1606       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1607       if (rowner == cowner) dnnz[i]++;
1608       else onnz[i]++;
1609     }
1610     for (j=bi[i]; j<bi[i+1]; j++) {
1611       PetscInt cowner,col = gcdest[bj[j]];
1612       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1613       if (rowner == cowner) dnnz[i]++;
1614       else onnz[i]++;
1615     }
1616   }
1617   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1618   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1619   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1620   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1621   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1622 
1623   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1624   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1625   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1626   for (i=0; i<m; i++) {
1627     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1628     PetscInt j0,rowlen;
1629     rowlen = ai[i+1] - ai[i];
1630     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1631       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1632       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1633     }
1634     rowlen = bi[i+1] - bi[i];
1635     for (j0=j=0; j<rowlen; j0=j) {
1636       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1637       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1638     }
1639   }
1640   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1641   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1643   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1644   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1645   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1646   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1647   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1648   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1649   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1650   *B = Aperm;
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 #undef __FUNCT__
1655 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1656 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1657 {
1658   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1659   PetscErrorCode ierr;
1660 
1661   PetscFunctionBegin;
1662   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1663   if (ghosts) *ghosts = aij->garray;
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1669 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1670 {
1671   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1672   Mat            A    = mat->A,B = mat->B;
1673   PetscErrorCode ierr;
1674   PetscReal      isend[5],irecv[5];
1675 
1676   PetscFunctionBegin;
1677   info->block_size = 1.0;
1678   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1679 
1680   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1681   isend[3] = info->memory;  isend[4] = info->mallocs;
1682 
1683   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1684 
1685   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1686   isend[3] += info->memory;  isend[4] += info->mallocs;
1687   if (flag == MAT_LOCAL) {
1688     info->nz_used      = isend[0];
1689     info->nz_allocated = isend[1];
1690     info->nz_unneeded  = isend[2];
1691     info->memory       = isend[3];
1692     info->mallocs      = isend[4];
1693   } else if (flag == MAT_GLOBAL_MAX) {
1694     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1695 
1696     info->nz_used      = irecv[0];
1697     info->nz_allocated = irecv[1];
1698     info->nz_unneeded  = irecv[2];
1699     info->memory       = irecv[3];
1700     info->mallocs      = irecv[4];
1701   } else if (flag == MAT_GLOBAL_SUM) {
1702     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1703 
1704     info->nz_used      = irecv[0];
1705     info->nz_allocated = irecv[1];
1706     info->nz_unneeded  = irecv[2];
1707     info->memory       = irecv[3];
1708     info->mallocs      = irecv[4];
1709   }
1710   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1711   info->fill_ratio_needed = 0;
1712   info->factor_mallocs    = 0;
1713   PetscFunctionReturn(0);
1714 }
1715 
1716 #undef __FUNCT__
1717 #define __FUNCT__ "MatSetOption_MPIAIJ"
1718 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1719 {
1720   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1721   PetscErrorCode ierr;
1722 
1723   PetscFunctionBegin;
1724   switch (op) {
1725   case MAT_NEW_NONZERO_LOCATIONS:
1726   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1727   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1728   case MAT_KEEP_NONZERO_PATTERN:
1729   case MAT_NEW_NONZERO_LOCATION_ERR:
1730   case MAT_USE_INODES:
1731   case MAT_IGNORE_ZERO_ENTRIES:
1732     MatCheckPreallocated(A,1);
1733     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1734     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1735     break;
1736   case MAT_ROW_ORIENTED:
1737     MatCheckPreallocated(A,1);
1738     a->roworiented = flg;
1739 
1740     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1741     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1742     break;
1743   case MAT_NEW_DIAGONALS:
1744     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1745     break;
1746   case MAT_IGNORE_OFF_PROC_ENTRIES:
1747     a->donotstash = flg;
1748     break;
1749   case MAT_SPD:
1750     A->spd_set = PETSC_TRUE;
1751     A->spd     = flg;
1752     if (flg) {
1753       A->symmetric                  = PETSC_TRUE;
1754       A->structurally_symmetric     = PETSC_TRUE;
1755       A->symmetric_set              = PETSC_TRUE;
1756       A->structurally_symmetric_set = PETSC_TRUE;
1757     }
1758     break;
1759   case MAT_SYMMETRIC:
1760     MatCheckPreallocated(A,1);
1761     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1762     break;
1763   case MAT_STRUCTURALLY_SYMMETRIC:
1764     MatCheckPreallocated(A,1);
1765     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1766     break;
1767   case MAT_HERMITIAN:
1768     MatCheckPreallocated(A,1);
1769     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1770     break;
1771   case MAT_SYMMETRY_ETERNAL:
1772     MatCheckPreallocated(A,1);
1773     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1774     break;
1775   default:
1776     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1777   }
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 #undef __FUNCT__
1782 #define __FUNCT__ "MatGetRow_MPIAIJ"
1783 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1784 {
1785   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1786   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1787   PetscErrorCode ierr;
1788   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1789   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1790   PetscInt       *cmap,*idx_p;
1791 
1792   PetscFunctionBegin;
1793   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1794   mat->getrowactive = PETSC_TRUE;
1795 
1796   if (!mat->rowvalues && (idx || v)) {
1797     /*
1798         allocate enough space to hold information from the longest row.
1799     */
1800     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1801     PetscInt   max = 1,tmp;
1802     for (i=0; i<matin->rmap->n; i++) {
1803       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1804       if (max < tmp) max = tmp;
1805     }
1806     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1807   }
1808 
1809   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1810   lrow = row - rstart;
1811 
1812   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1813   if (!v)   {pvA = 0; pvB = 0;}
1814   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1815   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1816   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1817   nztot = nzA + nzB;
1818 
1819   cmap = mat->garray;
1820   if (v  || idx) {
1821     if (nztot) {
1822       /* Sort by increasing column numbers, assuming A and B already sorted */
1823       PetscInt imark = -1;
1824       if (v) {
1825         *v = v_p = mat->rowvalues;
1826         for (i=0; i<nzB; i++) {
1827           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1828           else break;
1829         }
1830         imark = i;
1831         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1832         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1833       }
1834       if (idx) {
1835         *idx = idx_p = mat->rowindices;
1836         if (imark > -1) {
1837           for (i=0; i<imark; i++) {
1838             idx_p[i] = cmap[cworkB[i]];
1839           }
1840         } else {
1841           for (i=0; i<nzB; i++) {
1842             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1843             else break;
1844           }
1845           imark = i;
1846         }
1847         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1848         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1849       }
1850     } else {
1851       if (idx) *idx = 0;
1852       if (v)   *v   = 0;
1853     }
1854   }
1855   *nz  = nztot;
1856   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1857   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1863 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1864 {
1865   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1866 
1867   PetscFunctionBegin;
1868   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1869   aij->getrowactive = PETSC_FALSE;
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatNorm_MPIAIJ"
1875 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1876 {
1877   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1878   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1879   PetscErrorCode ierr;
1880   PetscInt       i,j,cstart = mat->cmap->rstart;
1881   PetscReal      sum = 0.0;
1882   MatScalar      *v;
1883 
1884   PetscFunctionBegin;
1885   if (aij->size == 1) {
1886     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1887   } else {
1888     if (type == NORM_FROBENIUS) {
1889       v = amat->a;
1890       for (i=0; i<amat->nz; i++) {
1891         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1892       }
1893       v = bmat->a;
1894       for (i=0; i<bmat->nz; i++) {
1895         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1896       }
1897       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1898       *norm = PetscSqrtReal(*norm);
1899       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1900     } else if (type == NORM_1) { /* max column norm */
1901       PetscReal *tmp,*tmp2;
1902       PetscInt  *jj,*garray = aij->garray;
1903       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1904       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1905       *norm = 0.0;
1906       v     = amat->a; jj = amat->j;
1907       for (j=0; j<amat->nz; j++) {
1908         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1909       }
1910       v = bmat->a; jj = bmat->j;
1911       for (j=0; j<bmat->nz; j++) {
1912         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1913       }
1914       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1915       for (j=0; j<mat->cmap->N; j++) {
1916         if (tmp2[j] > *norm) *norm = tmp2[j];
1917       }
1918       ierr = PetscFree(tmp);CHKERRQ(ierr);
1919       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1920       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1921     } else if (type == NORM_INFINITY) { /* max row norm */
1922       PetscReal ntemp = 0.0;
1923       for (j=0; j<aij->A->rmap->n; j++) {
1924         v   = amat->a + amat->i[j];
1925         sum = 0.0;
1926         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1927           sum += PetscAbsScalar(*v); v++;
1928         }
1929         v = bmat->a + bmat->i[j];
1930         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1931           sum += PetscAbsScalar(*v); v++;
1932         }
1933         if (sum > ntemp) ntemp = sum;
1934       }
1935       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1936       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1937     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatTranspose_MPIAIJ"
1944 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1945 {
1946   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1947   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1948   PetscErrorCode ierr;
1949   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1950   PetscInt       cstart = A->cmap->rstart,ncol;
1951   Mat            B;
1952   MatScalar      *array;
1953 
1954   PetscFunctionBegin;
1955   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1956 
1957   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1958   ai = Aloc->i; aj = Aloc->j;
1959   bi = Bloc->i; bj = Bloc->j;
1960   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1961     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1962     PetscSFNode          *oloc;
1963     PETSC_UNUSED PetscSF sf;
1964 
1965     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1966     /* compute d_nnz for preallocation */
1967     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1968     for (i=0; i<ai[ma]; i++) {
1969       d_nnz[aj[i]]++;
1970       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1971     }
1972     /* compute local off-diagonal contributions */
1973     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1974     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1975     /* map those to global */
1976     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1977     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1978     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1979     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1980     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1981     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1982     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1983 
1984     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1985     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1986     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1987     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1988     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1989     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1990   } else {
1991     B    = *matout;
1992     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1993     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1994   }
1995 
1996   /* copy over the A part */
1997   array = Aloc->a;
1998   row   = A->rmap->rstart;
1999   for (i=0; i<ma; i++) {
2000     ncol = ai[i+1]-ai[i];
2001     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2002     row++;
2003     array += ncol; aj += ncol;
2004   }
2005   aj = Aloc->j;
2006   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2007 
2008   /* copy over the B part */
2009   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2010   array = Bloc->a;
2011   row   = A->rmap->rstart;
2012   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2013   cols_tmp = cols;
2014   for (i=0; i<mb; i++) {
2015     ncol = bi[i+1]-bi[i];
2016     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2017     row++;
2018     array += ncol; cols_tmp += ncol;
2019   }
2020   ierr = PetscFree(cols);CHKERRQ(ierr);
2021 
2022   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2023   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2024   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2025     *matout = B;
2026   } else {
2027     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2028   }
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 #undef __FUNCT__
2033 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2034 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2035 {
2036   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2037   Mat            a    = aij->A,b = aij->B;
2038   PetscErrorCode ierr;
2039   PetscInt       s1,s2,s3;
2040 
2041   PetscFunctionBegin;
2042   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2043   if (rr) {
2044     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2045     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2046     /* Overlap communication with computation. */
2047     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2048   }
2049   if (ll) {
2050     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2051     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2052     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2053   }
2054   /* scale  the diagonal block */
2055   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2056 
2057   if (rr) {
2058     /* Do a scatter end and then right scale the off-diagonal block */
2059     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2060     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2061   }
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2067 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2068 {
2069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2070   PetscErrorCode ierr;
2071 
2072   PetscFunctionBegin;
2073   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatEqual_MPIAIJ"
2079 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2080 {
2081   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2082   Mat            a,b,c,d;
2083   PetscBool      flg;
2084   PetscErrorCode ierr;
2085 
2086   PetscFunctionBegin;
2087   a = matA->A; b = matA->B;
2088   c = matB->A; d = matB->B;
2089 
2090   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2091   if (flg) {
2092     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2093   }
2094   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatCopy_MPIAIJ"
2100 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2101 {
2102   PetscErrorCode ierr;
2103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2104   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2105 
2106   PetscFunctionBegin;
2107   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2108   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2109     /* because of the column compression in the off-processor part of the matrix a->B,
2110        the number of columns in a->B and b->B may be different, hence we cannot call
2111        the MatCopy() directly on the two parts. If need be, we can provide a more
2112        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2113        then copying the submatrices */
2114     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2115   } else {
2116     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2117     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatSetUp_MPIAIJ"
2124 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2125 {
2126   PetscErrorCode ierr;
2127 
2128   PetscFunctionBegin;
2129   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 /*
2134    Computes the number of nonzeros per row needed for preallocation when X and Y
2135    have different nonzero structure.
2136 */
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2139 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2140 {
2141   PetscInt       i,j,k,nzx,nzy;
2142 
2143   PetscFunctionBegin;
2144   /* Set the number of nonzeros in the new matrix */
2145   for (i=0; i<m; i++) {
2146     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2147     nzx = xi[i+1] - xi[i];
2148     nzy = yi[i+1] - yi[i];
2149     nnz[i] = 0;
2150     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2151       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2152       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2153       nnz[i]++;
2154     }
2155     for (; k<nzy; k++) nnz[i]++;
2156   }
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2163 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2164 {
2165   PetscErrorCode ierr;
2166   PetscInt       m = Y->rmap->N;
2167   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2168   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2169 
2170   PetscFunctionBegin;
2171   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatAXPY_MPIAIJ"
2177 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2178 {
2179   PetscErrorCode ierr;
2180   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2181   PetscBLASInt   bnz,one=1;
2182   Mat_SeqAIJ     *x,*y;
2183 
2184   PetscFunctionBegin;
2185   if (str == SAME_NONZERO_PATTERN) {
2186     PetscScalar alpha = a;
2187     x    = (Mat_SeqAIJ*)xx->A->data;
2188     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2189     y    = (Mat_SeqAIJ*)yy->A->data;
2190     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2191     x    = (Mat_SeqAIJ*)xx->B->data;
2192     y    = (Mat_SeqAIJ*)yy->B->data;
2193     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2194     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2195     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2196   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2197     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2198   } else {
2199     Mat      B;
2200     PetscInt *nnz_d,*nnz_o;
2201     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2202     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2203     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2204     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2205     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2206     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2207     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2208     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2209     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2210     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2211     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2212     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2213     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2214     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2215   }
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatConjugate_MPIAIJ"
2223 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2224 {
2225 #if defined(PETSC_USE_COMPLEX)
2226   PetscErrorCode ierr;
2227   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2228 
2229   PetscFunctionBegin;
2230   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2231   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2232 #else
2233   PetscFunctionBegin;
2234 #endif
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatRealPart_MPIAIJ"
2240 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2241 {
2242   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2243   PetscErrorCode ierr;
2244 
2245   PetscFunctionBegin;
2246   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2247   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 #undef __FUNCT__
2252 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2253 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2254 {
2255   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2256   PetscErrorCode ierr;
2257 
2258   PetscFunctionBegin;
2259   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2260   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2266 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2267 {
2268   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2269   PetscErrorCode ierr;
2270   PetscInt       i,*idxb = 0;
2271   PetscScalar    *va,*vb;
2272   Vec            vtmp;
2273 
2274   PetscFunctionBegin;
2275   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2276   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2277   if (idx) {
2278     for (i=0; i<A->rmap->n; i++) {
2279       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2280     }
2281   }
2282 
2283   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2284   if (idx) {
2285     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2286   }
2287   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2288   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2289 
2290   for (i=0; i<A->rmap->n; i++) {
2291     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2292       va[i] = vb[i];
2293       if (idx) idx[i] = a->garray[idxb[i]];
2294     }
2295   }
2296 
2297   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2298   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2299   ierr = PetscFree(idxb);CHKERRQ(ierr);
2300   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #undef __FUNCT__
2305 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2306 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2307 {
2308   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2309   PetscErrorCode ierr;
2310   PetscInt       i,*idxb = 0;
2311   PetscScalar    *va,*vb;
2312   Vec            vtmp;
2313 
2314   PetscFunctionBegin;
2315   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2316   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2317   if (idx) {
2318     for (i=0; i<A->cmap->n; i++) {
2319       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2320     }
2321   }
2322 
2323   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2324   if (idx) {
2325     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2326   }
2327   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2328   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2329 
2330   for (i=0; i<A->rmap->n; i++) {
2331     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2332       va[i] = vb[i];
2333       if (idx) idx[i] = a->garray[idxb[i]];
2334     }
2335   }
2336 
2337   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2338   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2339   ierr = PetscFree(idxb);CHKERRQ(ierr);
2340   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2346 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2347 {
2348   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2349   PetscInt       n      = A->rmap->n;
2350   PetscInt       cstart = A->cmap->rstart;
2351   PetscInt       *cmap  = mat->garray;
2352   PetscInt       *diagIdx, *offdiagIdx;
2353   Vec            diagV, offdiagV;
2354   PetscScalar    *a, *diagA, *offdiagA;
2355   PetscInt       r;
2356   PetscErrorCode ierr;
2357 
2358   PetscFunctionBegin;
2359   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2360   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2361   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2362   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2363   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2364   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2365   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2366   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2367   for (r = 0; r < n; ++r) {
2368     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2369       a[r]   = diagA[r];
2370       idx[r] = cstart + diagIdx[r];
2371     } else {
2372       a[r]   = offdiagA[r];
2373       idx[r] = cmap[offdiagIdx[r]];
2374     }
2375   }
2376   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2377   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2378   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2379   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2380   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2381   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2387 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2388 {
2389   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2390   PetscInt       n      = A->rmap->n;
2391   PetscInt       cstart = A->cmap->rstart;
2392   PetscInt       *cmap  = mat->garray;
2393   PetscInt       *diagIdx, *offdiagIdx;
2394   Vec            diagV, offdiagV;
2395   PetscScalar    *a, *diagA, *offdiagA;
2396   PetscInt       r;
2397   PetscErrorCode ierr;
2398 
2399   PetscFunctionBegin;
2400   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2401   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2402   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2403   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2404   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2405   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2406   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2407   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2408   for (r = 0; r < n; ++r) {
2409     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2410       a[r]   = diagA[r];
2411       idx[r] = cstart + diagIdx[r];
2412     } else {
2413       a[r]   = offdiagA[r];
2414       idx[r] = cmap[offdiagIdx[r]];
2415     }
2416   }
2417   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2418   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2419   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2420   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2421   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2422   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2428 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2429 {
2430   PetscErrorCode ierr;
2431   Mat            *dummy;
2432 
2433   PetscFunctionBegin;
2434   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2435   *newmat = *dummy;
2436   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2442 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2443 {
2444   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2445   PetscErrorCode ierr;
2446 
2447   PetscFunctionBegin;
2448   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2449   A->errortype = a->A->errortype;
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2455 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2456 {
2457   PetscErrorCode ierr;
2458   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2459 
2460   PetscFunctionBegin;
2461   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2462   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2463   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2464   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 #undef __FUNCT__
2469 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2470 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2471 {
2472   PetscFunctionBegin;
2473   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2474   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2480 /*@
2481    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2482 
2483    Collective on Mat
2484 
2485    Input Parameters:
2486 +    A - the matrix
2487 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2488 
2489  Level: advanced
2490 
2491 @*/
2492 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2493 {
2494   PetscErrorCode       ierr;
2495 
2496   PetscFunctionBegin;
2497   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 #undef __FUNCT__
2502 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2503 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2504 {
2505   PetscErrorCode       ierr;
2506   PetscBool            sc = PETSC_FALSE,flg;
2507 
2508   PetscFunctionBegin;
2509   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2510   ierr = PetscObjectOptionsBegin((PetscObject)A);
2511     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2512     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2513     if (flg) {
2514       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2515     }
2516   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatShift_MPIAIJ"
2522 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2523 {
2524   PetscErrorCode ierr;
2525   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2526   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2527 
2528   PetscFunctionBegin;
2529   if (!Y->preallocated) {
2530     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2531   } else if (!aij->nz) {
2532     PetscInt nonew = aij->nonew;
2533     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2534     aij->nonew = nonew;
2535   }
2536   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2542 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2543 {
2544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2545   PetscErrorCode ierr;
2546 
2547   PetscFunctionBegin;
2548   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2549   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2550   if (d) {
2551     PetscInt rstart;
2552     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2553     *d += rstart;
2554 
2555   }
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 
2560 /* -------------------------------------------------------------------*/
2561 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2562                                        MatGetRow_MPIAIJ,
2563                                        MatRestoreRow_MPIAIJ,
2564                                        MatMult_MPIAIJ,
2565                                 /* 4*/ MatMultAdd_MPIAIJ,
2566                                        MatMultTranspose_MPIAIJ,
2567                                        MatMultTransposeAdd_MPIAIJ,
2568                                        0,
2569                                        0,
2570                                        0,
2571                                 /*10*/ 0,
2572                                        0,
2573                                        0,
2574                                        MatSOR_MPIAIJ,
2575                                        MatTranspose_MPIAIJ,
2576                                 /*15*/ MatGetInfo_MPIAIJ,
2577                                        MatEqual_MPIAIJ,
2578                                        MatGetDiagonal_MPIAIJ,
2579                                        MatDiagonalScale_MPIAIJ,
2580                                        MatNorm_MPIAIJ,
2581                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2582                                        MatAssemblyEnd_MPIAIJ,
2583                                        MatSetOption_MPIAIJ,
2584                                        MatZeroEntries_MPIAIJ,
2585                                 /*24*/ MatZeroRows_MPIAIJ,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                        0,
2590                                 /*29*/ MatSetUp_MPIAIJ,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                 /*34*/ MatDuplicate_MPIAIJ,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                        0,
2600                                 /*39*/ MatAXPY_MPIAIJ,
2601                                        MatGetSubMatrices_MPIAIJ,
2602                                        MatIncreaseOverlap_MPIAIJ,
2603                                        MatGetValues_MPIAIJ,
2604                                        MatCopy_MPIAIJ,
2605                                 /*44*/ MatGetRowMax_MPIAIJ,
2606                                        MatScale_MPIAIJ,
2607                                        MatShift_MPIAIJ,
2608                                        MatDiagonalSet_MPIAIJ,
2609                                        MatZeroRowsColumns_MPIAIJ,
2610                                 /*49*/ MatSetRandom_MPIAIJ,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2616                                        0,
2617                                        MatSetUnfactored_MPIAIJ,
2618                                        MatPermute_MPIAIJ,
2619                                        0,
2620                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2621                                        MatDestroy_MPIAIJ,
2622                                        MatView_MPIAIJ,
2623                                        0,
2624                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2625                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2626                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2631                                        MatGetRowMinAbs_MPIAIJ,
2632                                        0,
2633                                        MatSetColoring_MPIAIJ,
2634                                        0,
2635                                        MatSetValuesAdifor_MPIAIJ,
2636                                 /*75*/ MatFDColoringApply_AIJ,
2637                                        MatSetFromOptions_MPIAIJ,
2638                                        0,
2639                                        0,
2640                                        MatFindZeroDiagonals_MPIAIJ,
2641                                 /*80*/ 0,
2642                                        0,
2643                                        0,
2644                                 /*83*/ MatLoad_MPIAIJ,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2651                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2652                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2653                                        MatPtAP_MPIAIJ_MPIAIJ,
2654                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2655                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*99*/ 0,
2661                                        0,
2662                                        0,
2663                                        MatConjugate_MPIAIJ,
2664                                        0,
2665                                 /*104*/MatSetValuesRow_MPIAIJ,
2666                                        MatRealPart_MPIAIJ,
2667                                        MatImaginaryPart_MPIAIJ,
2668                                        0,
2669                                        0,
2670                                 /*109*/0,
2671                                        0,
2672                                        MatGetRowMin_MPIAIJ,
2673                                        0,
2674                                        MatMissingDiagonal_MPIAIJ,
2675                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2676                                        0,
2677                                        MatGetGhosts_MPIAIJ,
2678                                        0,
2679                                        0,
2680                                 /*119*/0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        MatGetMultiProcBlock_MPIAIJ,
2685                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2686                                        MatGetColumnNorms_MPIAIJ,
2687                                        MatInvertBlockDiagonal_MPIAIJ,
2688                                        0,
2689                                        MatGetSubMatricesMPI_MPIAIJ,
2690                                 /*129*/0,
2691                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2692                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2693                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2694                                        0,
2695                                 /*134*/0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                 /*139*/0,
2701                                        0,
2702                                        0,
2703                                        MatFDColoringSetUp_MPIXAIJ,
2704                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2705                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2706 };
2707 
2708 /* ----------------------------------------------------------------------------------------*/
2709 
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2712 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2713 {
2714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2715   PetscErrorCode ierr;
2716 
2717   PetscFunctionBegin;
2718   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2719   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 #undef __FUNCT__
2724 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2725 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2726 {
2727   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2728   PetscErrorCode ierr;
2729 
2730   PetscFunctionBegin;
2731   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2732   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2738 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2739 {
2740   Mat_MPIAIJ     *b;
2741   PetscErrorCode ierr;
2742 
2743   PetscFunctionBegin;
2744   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2745   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2746   b = (Mat_MPIAIJ*)B->data;
2747 
2748   if (!B->preallocated) {
2749     /* Explicitly create 2 MATSEQAIJ matrices. */
2750     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2751     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2752     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2753     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2754     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2755     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2756     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2757     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2758     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2759     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2760   }
2761 
2762   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2763   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2764   B->preallocated = PETSC_TRUE;
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 #undef __FUNCT__
2769 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2770 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2771 {
2772   Mat            mat;
2773   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2774   PetscErrorCode ierr;
2775 
2776   PetscFunctionBegin;
2777   *newmat = 0;
2778   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2779   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2780   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2781   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2782   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2783   a       = (Mat_MPIAIJ*)mat->data;
2784 
2785   mat->factortype   = matin->factortype;
2786   mat->assembled    = PETSC_TRUE;
2787   mat->insertmode   = NOT_SET_VALUES;
2788   mat->preallocated = PETSC_TRUE;
2789 
2790   a->size         = oldmat->size;
2791   a->rank         = oldmat->rank;
2792   a->donotstash   = oldmat->donotstash;
2793   a->roworiented  = oldmat->roworiented;
2794   a->rowindices   = 0;
2795   a->rowvalues    = 0;
2796   a->getrowactive = PETSC_FALSE;
2797 
2798   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2799   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2800 
2801   if (oldmat->colmap) {
2802 #if defined(PETSC_USE_CTABLE)
2803     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2804 #else
2805     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2806     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2807     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2808 #endif
2809   } else a->colmap = 0;
2810   if (oldmat->garray) {
2811     PetscInt len;
2812     len  = oldmat->B->cmap->n;
2813     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2814     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2815     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2816   } else a->garray = 0;
2817 
2818   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2819   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2820   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2821   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2822   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2823   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2824   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2825   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2826   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2827   *newmat = mat;
2828   PetscFunctionReturn(0);
2829 }
2830 
2831 
2832 
2833 #undef __FUNCT__
2834 #define __FUNCT__ "MatLoad_MPIAIJ"
2835 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2836 {
2837   PetscScalar    *vals,*svals;
2838   MPI_Comm       comm;
2839   PetscErrorCode ierr;
2840   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2841   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2842   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2843   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2844   PetscInt       cend,cstart,n,*rowners;
2845   int            fd;
2846   PetscInt       bs = newMat->rmap->bs;
2847 
2848   PetscFunctionBegin;
2849   /* force binary viewer to load .info file if it has not yet done so */
2850   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2851   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2852   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2853   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2854   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2855   if (!rank) {
2856     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2857     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2858     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MPIAIJ");
2859   }
2860 
2861   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2862   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2863   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2864   if (bs < 0) bs = 1;
2865 
2866   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2867   M    = header[1]; N = header[2];
2868 
2869   /* If global sizes are set, check if they are consistent with that given in the file */
2870   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2871   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2872 
2873   /* determine ownership of all (block) rows */
2874   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2875   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2876   else m = newMat->rmap->n; /* Set by user */
2877 
2878   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2879   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2880 
2881   /* First process needs enough room for process with most rows */
2882   if (!rank) {
2883     mmax = rowners[1];
2884     for (i=2; i<=size; i++) {
2885       mmax = PetscMax(mmax, rowners[i]);
2886     }
2887   } else mmax = -1;             /* unused, but compilers complain */
2888 
2889   rowners[0] = 0;
2890   for (i=2; i<=size; i++) {
2891     rowners[i] += rowners[i-1];
2892   }
2893   rstart = rowners[rank];
2894   rend   = rowners[rank+1];
2895 
2896   /* distribute row lengths to all processors */
2897   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2898   if (!rank) {
2899     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2900     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2901     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2902     for (j=0; j<m; j++) {
2903       procsnz[0] += ourlens[j];
2904     }
2905     for (i=1; i<size; i++) {
2906       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2907       /* calculate the number of nonzeros on each processor */
2908       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2909         procsnz[i] += rowlengths[j];
2910       }
2911       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2912     }
2913     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2914   } else {
2915     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2916   }
2917 
2918   if (!rank) {
2919     /* determine max buffer needed and allocate it */
2920     maxnz = 0;
2921     for (i=0; i<size; i++) {
2922       maxnz = PetscMax(maxnz,procsnz[i]);
2923     }
2924     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2925 
2926     /* read in my part of the matrix column indices  */
2927     nz   = procsnz[0];
2928     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2929     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2930 
2931     /* read in every one elses and ship off */
2932     for (i=1; i<size; i++) {
2933       nz   = procsnz[i];
2934       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2935       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2936     }
2937     ierr = PetscFree(cols);CHKERRQ(ierr);
2938   } else {
2939     /* determine buffer space needed for message */
2940     nz = 0;
2941     for (i=0; i<m; i++) {
2942       nz += ourlens[i];
2943     }
2944     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2945 
2946     /* receive message of column indices*/
2947     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2948   }
2949 
2950   /* determine column ownership if matrix is not square */
2951   if (N != M) {
2952     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2953     else n = newMat->cmap->n;
2954     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2955     cstart = cend - n;
2956   } else {
2957     cstart = rstart;
2958     cend   = rend;
2959     n      = cend - cstart;
2960   }
2961 
2962   /* loop over local rows, determining number of off diagonal entries */
2963   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2964   jj   = 0;
2965   for (i=0; i<m; i++) {
2966     for (j=0; j<ourlens[i]; j++) {
2967       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2968       jj++;
2969     }
2970   }
2971 
2972   for (i=0; i<m; i++) {
2973     ourlens[i] -= offlens[i];
2974   }
2975   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2976 
2977   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2978 
2979   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2980 
2981   for (i=0; i<m; i++) {
2982     ourlens[i] += offlens[i];
2983   }
2984 
2985   if (!rank) {
2986     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2987 
2988     /* read in my part of the matrix numerical values  */
2989     nz   = procsnz[0];
2990     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2991 
2992     /* insert into matrix */
2993     jj      = rstart;
2994     smycols = mycols;
2995     svals   = vals;
2996     for (i=0; i<m; i++) {
2997       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2998       smycols += ourlens[i];
2999       svals   += ourlens[i];
3000       jj++;
3001     }
3002 
3003     /* read in other processors and ship out */
3004     for (i=1; i<size; i++) {
3005       nz   = procsnz[i];
3006       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3007       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3008     }
3009     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3010   } else {
3011     /* receive numeric values */
3012     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3013 
3014     /* receive message of values*/
3015     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3016 
3017     /* insert into matrix */
3018     jj      = rstart;
3019     smycols = mycols;
3020     svals   = vals;
3021     for (i=0; i<m; i++) {
3022       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3023       smycols += ourlens[i];
3024       svals   += ourlens[i];
3025       jj++;
3026     }
3027   }
3028   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3029   ierr = PetscFree(vals);CHKERRQ(ierr);
3030   ierr = PetscFree(mycols);CHKERRQ(ierr);
3031   ierr = PetscFree(rowners);CHKERRQ(ierr);
3032   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3033   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3034   PetscFunctionReturn(0);
3035 }
3036 
3037 #undef __FUNCT__
3038 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3039 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3040 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3041 {
3042   PetscErrorCode ierr;
3043   IS             iscol_local;
3044   PetscInt       csize;
3045 
3046   PetscFunctionBegin;
3047   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3048   if (call == MAT_REUSE_MATRIX) {
3049     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3050     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3051   } else {
3052     /* check if we are grabbing all columns*/
3053     PetscBool    isstride;
3054     PetscMPIInt  lisstride = 0,gisstride;
3055     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3056     if (isstride) {
3057       PetscInt  start,len,mstart,mlen;
3058       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3059       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3060       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3061       if (mstart == start && mlen-mstart == len) lisstride = 1;
3062     }
3063     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3064     if (gisstride) {
3065       PetscInt N;
3066       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3067       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3068       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3069       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3070     } else {
3071       PetscInt cbs;
3072       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3073       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3074       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3075     }
3076   }
3077   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3078   if (call == MAT_INITIAL_MATRIX) {
3079     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3080     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3081   }
3082   PetscFunctionReturn(0);
3083 }
3084 
3085 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3086 #undef __FUNCT__
3087 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3088 /*
3089     Not great since it makes two copies of the submatrix, first an SeqAIJ
3090   in local and then by concatenating the local matrices the end result.
3091   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3092 
3093   Note: This requires a sequential iscol with all indices.
3094 */
3095 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3096 {
3097   PetscErrorCode ierr;
3098   PetscMPIInt    rank,size;
3099   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3100   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3101   PetscBool      allcolumns, colflag;
3102   Mat            M,Mreuse;
3103   MatScalar      *vwork,*aa;
3104   MPI_Comm       comm;
3105   Mat_SeqAIJ     *aij;
3106 
3107   PetscFunctionBegin;
3108   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3111 
3112   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3113   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3114   if (colflag && ncol == mat->cmap->N) {
3115     allcolumns = PETSC_TRUE;
3116     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3117   } else {
3118     allcolumns = PETSC_FALSE;
3119   }
3120   if (call ==  MAT_REUSE_MATRIX) {
3121     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3122     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3123     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3124   } else {
3125     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3126   }
3127 
3128   /*
3129       m - number of local rows
3130       n - number of columns (same on all processors)
3131       rstart - first row in new global matrix generated
3132   */
3133   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3134   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3135   if (call == MAT_INITIAL_MATRIX) {
3136     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3137     ii  = aij->i;
3138     jj  = aij->j;
3139 
3140     /*
3141         Determine the number of non-zeros in the diagonal and off-diagonal
3142         portions of the matrix in order to do correct preallocation
3143     */
3144 
3145     /* first get start and end of "diagonal" columns */
3146     if (csize == PETSC_DECIDE) {
3147       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3148       if (mglobal == n) { /* square matrix */
3149         nlocal = m;
3150       } else {
3151         nlocal = n/size + ((n % size) > rank);
3152       }
3153     } else {
3154       nlocal = csize;
3155     }
3156     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3157     rstart = rend - nlocal;
3158     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3159 
3160     /* next, compute all the lengths */
3161     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3162     olens = dlens + m;
3163     for (i=0; i<m; i++) {
3164       jend = ii[i+1] - ii[i];
3165       olen = 0;
3166       dlen = 0;
3167       for (j=0; j<jend; j++) {
3168         if (*jj < rstart || *jj >= rend) olen++;
3169         else dlen++;
3170         jj++;
3171       }
3172       olens[i] = olen;
3173       dlens[i] = dlen;
3174     }
3175     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3176     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3177     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3178     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3179     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3180     ierr = PetscFree(dlens);CHKERRQ(ierr);
3181   } else {
3182     PetscInt ml,nl;
3183 
3184     M    = *newmat;
3185     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3186     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3187     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3188     /*
3189          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3190        rather than the slower MatSetValues().
3191     */
3192     M->was_assembled = PETSC_TRUE;
3193     M->assembled     = PETSC_FALSE;
3194   }
3195   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3196   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3197   ii   = aij->i;
3198   jj   = aij->j;
3199   aa   = aij->a;
3200   for (i=0; i<m; i++) {
3201     row   = rstart + i;
3202     nz    = ii[i+1] - ii[i];
3203     cwork = jj;     jj += nz;
3204     vwork = aa;     aa += nz;
3205     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3206   }
3207 
3208   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3209   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3210   *newmat = M;
3211 
3212   /* save submatrix used in processor for next request */
3213   if (call ==  MAT_INITIAL_MATRIX) {
3214     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3215     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3216   }
3217   PetscFunctionReturn(0);
3218 }
3219 
3220 #undef __FUNCT__
3221 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3222 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3223 {
3224   PetscInt       m,cstart, cend,j,nnz,i,d;
3225   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3226   const PetscInt *JJ;
3227   PetscScalar    *values;
3228   PetscErrorCode ierr;
3229 
3230   PetscFunctionBegin;
3231   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3232 
3233   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3234   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3235   m      = B->rmap->n;
3236   cstart = B->cmap->rstart;
3237   cend   = B->cmap->rend;
3238   rstart = B->rmap->rstart;
3239 
3240   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3241 
3242 #if defined(PETSC_USE_DEBUGGING)
3243   for (i=0; i<m; i++) {
3244     nnz = Ii[i+1]- Ii[i];
3245     JJ  = J + Ii[i];
3246     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3247     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3248     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3249   }
3250 #endif
3251 
3252   for (i=0; i<m; i++) {
3253     nnz     = Ii[i+1]- Ii[i];
3254     JJ      = J + Ii[i];
3255     nnz_max = PetscMax(nnz_max,nnz);
3256     d       = 0;
3257     for (j=0; j<nnz; j++) {
3258       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3259     }
3260     d_nnz[i] = d;
3261     o_nnz[i] = nnz - d;
3262   }
3263   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3264   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3265 
3266   if (v) values = (PetscScalar*)v;
3267   else {
3268     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3269   }
3270 
3271   for (i=0; i<m; i++) {
3272     ii   = i + rstart;
3273     nnz  = Ii[i+1]- Ii[i];
3274     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3275   }
3276   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3277   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3278 
3279   if (!v) {
3280     ierr = PetscFree(values);CHKERRQ(ierr);
3281   }
3282   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3283   PetscFunctionReturn(0);
3284 }
3285 
3286 #undef __FUNCT__
3287 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3288 /*@
3289    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3290    (the default parallel PETSc format).
3291 
3292    Collective on MPI_Comm
3293 
3294    Input Parameters:
3295 +  B - the matrix
3296 .  i - the indices into j for the start of each local row (starts with zero)
3297 .  j - the column indices for each local row (starts with zero)
3298 -  v - optional values in the matrix
3299 
3300    Level: developer
3301 
3302    Notes:
3303        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3304      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3305      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3306 
3307        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3308 
3309        The format which is used for the sparse matrix input, is equivalent to a
3310     row-major ordering.. i.e for the following matrix, the input data expected is
3311     as shown
3312 
3313 $        1 0 0
3314 $        2 0 3     P0
3315 $       -------
3316 $        4 5 6     P1
3317 $
3318 $     Process0 [P0]: rows_owned=[0,1]
3319 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3320 $        j =  {0,0,2}  [size = 3]
3321 $        v =  {1,2,3}  [size = 3]
3322 $
3323 $     Process1 [P1]: rows_owned=[2]
3324 $        i =  {0,3}    [size = nrow+1  = 1+1]
3325 $        j =  {0,1,2}  [size = 3]
3326 $        v =  {4,5,6}  [size = 3]
3327 
3328 .keywords: matrix, aij, compressed row, sparse, parallel
3329 
3330 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3331           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3332 @*/
3333 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3334 {
3335   PetscErrorCode ierr;
3336 
3337   PetscFunctionBegin;
3338   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3339   PetscFunctionReturn(0);
3340 }
3341 
3342 #undef __FUNCT__
3343 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3344 /*@C
3345    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3346    (the default parallel PETSc format).  For good matrix assembly performance
3347    the user should preallocate the matrix storage by setting the parameters
3348    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3349    performance can be increased by more than a factor of 50.
3350 
3351    Collective on MPI_Comm
3352 
3353    Input Parameters:
3354 +  B - the matrix
3355 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3356            (same value is used for all local rows)
3357 .  d_nnz - array containing the number of nonzeros in the various rows of the
3358            DIAGONAL portion of the local submatrix (possibly different for each row)
3359            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3360            The size of this array is equal to the number of local rows, i.e 'm'.
3361            For matrices that will be factored, you must leave room for (and set)
3362            the diagonal entry even if it is zero.
3363 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3364            submatrix (same value is used for all local rows).
3365 -  o_nnz - array containing the number of nonzeros in the various rows of the
3366            OFF-DIAGONAL portion of the local submatrix (possibly different for
3367            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3368            structure. The size of this array is equal to the number
3369            of local rows, i.e 'm'.
3370 
3371    If the *_nnz parameter is given then the *_nz parameter is ignored
3372 
3373    The AIJ format (also called the Yale sparse matrix format or
3374    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3375    storage.  The stored row and column indices begin with zero.
3376    See Users-Manual: ch_mat for details.
3377 
3378    The parallel matrix is partitioned such that the first m0 rows belong to
3379    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3380    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3381 
3382    The DIAGONAL portion of the local submatrix of a processor can be defined
3383    as the submatrix which is obtained by extraction the part corresponding to
3384    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3385    first row that belongs to the processor, r2 is the last row belonging to
3386    the this processor, and c1-c2 is range of indices of the local part of a
3387    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3388    common case of a square matrix, the row and column ranges are the same and
3389    the DIAGONAL part is also square. The remaining portion of the local
3390    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3391 
3392    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3393 
3394    You can call MatGetInfo() to get information on how effective the preallocation was;
3395    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3396    You can also run with the option -info and look for messages with the string
3397    malloc in them to see if additional memory allocation was needed.
3398 
3399    Example usage:
3400 
3401    Consider the following 8x8 matrix with 34 non-zero values, that is
3402    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3403    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3404    as follows:
3405 
3406 .vb
3407             1  2  0  |  0  3  0  |  0  4
3408     Proc0   0  5  6  |  7  0  0  |  8  0
3409             9  0 10  | 11  0  0  | 12  0
3410     -------------------------------------
3411            13  0 14  | 15 16 17  |  0  0
3412     Proc1   0 18  0  | 19 20 21  |  0  0
3413             0  0  0  | 22 23  0  | 24  0
3414     -------------------------------------
3415     Proc2  25 26 27  |  0  0 28  | 29  0
3416            30  0  0  | 31 32 33  |  0 34
3417 .ve
3418 
3419    This can be represented as a collection of submatrices as:
3420 
3421 .vb
3422       A B C
3423       D E F
3424       G H I
3425 .ve
3426 
3427    Where the submatrices A,B,C are owned by proc0, D,E,F are
3428    owned by proc1, G,H,I are owned by proc2.
3429 
3430    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3431    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3432    The 'M','N' parameters are 8,8, and have the same values on all procs.
3433 
3434    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3435    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3436    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3437    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3438    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3439    matrix, ans [DF] as another SeqAIJ matrix.
3440 
3441    When d_nz, o_nz parameters are specified, d_nz storage elements are
3442    allocated for every row of the local diagonal submatrix, and o_nz
3443    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3444    One way to choose d_nz and o_nz is to use the max nonzerors per local
3445    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3446    In this case, the values of d_nz,o_nz are:
3447 .vb
3448      proc0 : dnz = 2, o_nz = 2
3449      proc1 : dnz = 3, o_nz = 2
3450      proc2 : dnz = 1, o_nz = 4
3451 .ve
3452    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3453    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3454    for proc3. i.e we are using 12+15+10=37 storage locations to store
3455    34 values.
3456 
3457    When d_nnz, o_nnz parameters are specified, the storage is specified
3458    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3459    In the above case the values for d_nnz,o_nnz are:
3460 .vb
3461      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3462      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3463      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3464 .ve
3465    Here the space allocated is sum of all the above values i.e 34, and
3466    hence pre-allocation is perfect.
3467 
3468    Level: intermediate
3469 
3470 .keywords: matrix, aij, compressed row, sparse, parallel
3471 
3472 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3473           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3474 @*/
3475 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3476 {
3477   PetscErrorCode ierr;
3478 
3479   PetscFunctionBegin;
3480   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3481   PetscValidType(B,1);
3482   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3483   PetscFunctionReturn(0);
3484 }
3485 
3486 #undef __FUNCT__
3487 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3488 /*@
3489      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3490          CSR format the local rows.
3491 
3492    Collective on MPI_Comm
3493 
3494    Input Parameters:
3495 +  comm - MPI communicator
3496 .  m - number of local rows (Cannot be PETSC_DECIDE)
3497 .  n - This value should be the same as the local size used in creating the
3498        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3499        calculated if N is given) For square matrices n is almost always m.
3500 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3501 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3502 .   i - row indices
3503 .   j - column indices
3504 -   a - matrix values
3505 
3506    Output Parameter:
3507 .   mat - the matrix
3508 
3509    Level: intermediate
3510 
3511    Notes:
3512        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3513      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3514      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3515 
3516        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3517 
3518        The format which is used for the sparse matrix input, is equivalent to a
3519     row-major ordering.. i.e for the following matrix, the input data expected is
3520     as shown
3521 
3522 $        1 0 0
3523 $        2 0 3     P0
3524 $       -------
3525 $        4 5 6     P1
3526 $
3527 $     Process0 [P0]: rows_owned=[0,1]
3528 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3529 $        j =  {0,0,2}  [size = 3]
3530 $        v =  {1,2,3}  [size = 3]
3531 $
3532 $     Process1 [P1]: rows_owned=[2]
3533 $        i =  {0,3}    [size = nrow+1  = 1+1]
3534 $        j =  {0,1,2}  [size = 3]
3535 $        v =  {4,5,6}  [size = 3]
3536 
3537 .keywords: matrix, aij, compressed row, sparse, parallel
3538 
3539 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3540           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3541 @*/
3542 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3543 {
3544   PetscErrorCode ierr;
3545 
3546   PetscFunctionBegin;
3547   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3548   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3549   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3550   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3551   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3552   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3553   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3554   PetscFunctionReturn(0);
3555 }
3556 
3557 #undef __FUNCT__
3558 #define __FUNCT__ "MatCreateAIJ"
3559 /*@C
3560    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3561    (the default parallel PETSc format).  For good matrix assembly performance
3562    the user should preallocate the matrix storage by setting the parameters
3563    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3564    performance can be increased by more than a factor of 50.
3565 
3566    Collective on MPI_Comm
3567 
3568    Input Parameters:
3569 +  comm - MPI communicator
3570 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3571            This value should be the same as the local size used in creating the
3572            y vector for the matrix-vector product y = Ax.
3573 .  n - This value should be the same as the local size used in creating the
3574        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3575        calculated if N is given) For square matrices n is almost always m.
3576 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3577 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3578 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3579            (same value is used for all local rows)
3580 .  d_nnz - array containing the number of nonzeros in the various rows of the
3581            DIAGONAL portion of the local submatrix (possibly different for each row)
3582            or NULL, if d_nz is used to specify the nonzero structure.
3583            The size of this array is equal to the number of local rows, i.e 'm'.
3584 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3585            submatrix (same value is used for all local rows).
3586 -  o_nnz - array containing the number of nonzeros in the various rows of the
3587            OFF-DIAGONAL portion of the local submatrix (possibly different for
3588            each row) or NULL, if o_nz is used to specify the nonzero
3589            structure. The size of this array is equal to the number
3590            of local rows, i.e 'm'.
3591 
3592    Output Parameter:
3593 .  A - the matrix
3594 
3595    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3596    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3597    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3598 
3599    Notes:
3600    If the *_nnz parameter is given then the *_nz parameter is ignored
3601 
3602    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3603    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3604    storage requirements for this matrix.
3605 
3606    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3607    processor than it must be used on all processors that share the object for
3608    that argument.
3609 
3610    The user MUST specify either the local or global matrix dimensions
3611    (possibly both).
3612 
3613    The parallel matrix is partitioned across processors such that the
3614    first m0 rows belong to process 0, the next m1 rows belong to
3615    process 1, the next m2 rows belong to process 2 etc.. where
3616    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3617    values corresponding to [m x N] submatrix.
3618 
3619    The columns are logically partitioned with the n0 columns belonging
3620    to 0th partition, the next n1 columns belonging to the next
3621    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3622 
3623    The DIAGONAL portion of the local submatrix on any given processor
3624    is the submatrix corresponding to the rows and columns m,n
3625    corresponding to the given processor. i.e diagonal matrix on
3626    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3627    etc. The remaining portion of the local submatrix [m x (N-n)]
3628    constitute the OFF-DIAGONAL portion. The example below better
3629    illustrates this concept.
3630 
3631    For a square global matrix we define each processor's diagonal portion
3632    to be its local rows and the corresponding columns (a square submatrix);
3633    each processor's off-diagonal portion encompasses the remainder of the
3634    local matrix (a rectangular submatrix).
3635 
3636    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3637 
3638    When calling this routine with a single process communicator, a matrix of
3639    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3640    type of communicator, use the construction mechanism:
3641      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3642 
3643    By default, this format uses inodes (identical nodes) when possible.
3644    We search for consecutive rows with the same nonzero structure, thereby
3645    reusing matrix information to achieve increased efficiency.
3646 
3647    Options Database Keys:
3648 +  -mat_no_inode  - Do not use inodes
3649 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3650 -  -mat_aij_oneindex - Internally use indexing starting at 1
3651         rather than 0.  Note that when calling MatSetValues(),
3652         the user still MUST index entries starting at 0!
3653 
3654 
3655    Example usage:
3656 
3657    Consider the following 8x8 matrix with 34 non-zero values, that is
3658    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3659    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3660    as follows:
3661 
3662 .vb
3663             1  2  0  |  0  3  0  |  0  4
3664     Proc0   0  5  6  |  7  0  0  |  8  0
3665             9  0 10  | 11  0  0  | 12  0
3666     -------------------------------------
3667            13  0 14  | 15 16 17  |  0  0
3668     Proc1   0 18  0  | 19 20 21  |  0  0
3669             0  0  0  | 22 23  0  | 24  0
3670     -------------------------------------
3671     Proc2  25 26 27  |  0  0 28  | 29  0
3672            30  0  0  | 31 32 33  |  0 34
3673 .ve
3674 
3675    This can be represented as a collection of submatrices as:
3676 
3677 .vb
3678       A B C
3679       D E F
3680       G H I
3681 .ve
3682 
3683    Where the submatrices A,B,C are owned by proc0, D,E,F are
3684    owned by proc1, G,H,I are owned by proc2.
3685 
3686    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3687    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3688    The 'M','N' parameters are 8,8, and have the same values on all procs.
3689 
3690    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3691    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3692    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3693    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3694    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3695    matrix, ans [DF] as another SeqAIJ matrix.
3696 
3697    When d_nz, o_nz parameters are specified, d_nz storage elements are
3698    allocated for every row of the local diagonal submatrix, and o_nz
3699    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3700    One way to choose d_nz and o_nz is to use the max nonzerors per local
3701    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3702    In this case, the values of d_nz,o_nz are:
3703 .vb
3704      proc0 : dnz = 2, o_nz = 2
3705      proc1 : dnz = 3, o_nz = 2
3706      proc2 : dnz = 1, o_nz = 4
3707 .ve
3708    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3709    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3710    for proc3. i.e we are using 12+15+10=37 storage locations to store
3711    34 values.
3712 
3713    When d_nnz, o_nnz parameters are specified, the storage is specified
3714    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3715    In the above case the values for d_nnz,o_nnz are:
3716 .vb
3717      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3718      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3719      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3720 .ve
3721    Here the space allocated is sum of all the above values i.e 34, and
3722    hence pre-allocation is perfect.
3723 
3724    Level: intermediate
3725 
3726 .keywords: matrix, aij, compressed row, sparse, parallel
3727 
3728 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3729           MPIAIJ, MatCreateMPIAIJWithArrays()
3730 @*/
3731 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3732 {
3733   PetscErrorCode ierr;
3734   PetscMPIInt    size;
3735 
3736   PetscFunctionBegin;
3737   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3738   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3739   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3740   if (size > 1) {
3741     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3742     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3743   } else {
3744     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3745     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3746   }
3747   PetscFunctionReturn(0);
3748 }
3749 
3750 #undef __FUNCT__
3751 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3752 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3753 {
3754   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3755   PetscBool      flg;
3756   PetscErrorCode ierr;
3757 
3758   PetscFunctionBegin;
3759   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
3760   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input");
3761   if (Ad)     *Ad     = a->A;
3762   if (Ao)     *Ao     = a->B;
3763   if (colmap) *colmap = a->garray;
3764   PetscFunctionReturn(0);
3765 }
3766 
3767 #undef __FUNCT__
3768 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3769 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3770 {
3771   PetscErrorCode ierr;
3772   PetscInt       i;
3773   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3774 
3775   PetscFunctionBegin;
3776   if (coloring->ctype == IS_COLORING_GLOBAL) {
3777     ISColoringValue *allcolors,*colors;
3778     ISColoring      ocoloring;
3779 
3780     /* set coloring for diagonal portion */
3781     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3782 
3783     /* set coloring for off-diagonal portion */
3784     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3785     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3786     for (i=0; i<a->B->cmap->n; i++) {
3787       colors[i] = allcolors[a->garray[i]];
3788     }
3789     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3790     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3791     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3792     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3793   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3794     ISColoringValue *colors;
3795     PetscInt        *larray;
3796     ISColoring      ocoloring;
3797 
3798     /* set coloring for diagonal portion */
3799     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3800     for (i=0; i<a->A->cmap->n; i++) {
3801       larray[i] = i + A->cmap->rstart;
3802     }
3803     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3804     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3805     for (i=0; i<a->A->cmap->n; i++) {
3806       colors[i] = coloring->colors[larray[i]];
3807     }
3808     ierr = PetscFree(larray);CHKERRQ(ierr);
3809     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3810     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3811     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3812 
3813     /* set coloring for off-diagonal portion */
3814     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3815     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3816     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3817     for (i=0; i<a->B->cmap->n; i++) {
3818       colors[i] = coloring->colors[larray[i]];
3819     }
3820     ierr = PetscFree(larray);CHKERRQ(ierr);
3821     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3822     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3823     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3824   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3825   PetscFunctionReturn(0);
3826 }
3827 
3828 #undef __FUNCT__
3829 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3830 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3831 {
3832   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3833   PetscErrorCode ierr;
3834 
3835   PetscFunctionBegin;
3836   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3837   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3838   PetscFunctionReturn(0);
3839 }
3840 
3841 #undef __FUNCT__
3842 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3843 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3844 {
3845   PetscErrorCode ierr;
3846   PetscInt       m,N,i,rstart,nnz,Ii;
3847   PetscInt       *indx;
3848   PetscScalar    *values;
3849 
3850   PetscFunctionBegin;
3851   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3852   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3853     PetscInt       *dnz,*onz,sum,bs,cbs;
3854 
3855     if (n == PETSC_DECIDE) {
3856       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3857     }
3858     /* Check sum(n) = N */
3859     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3860     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3861 
3862     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3863     rstart -= m;
3864 
3865     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3866     for (i=0; i<m; i++) {
3867       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3868       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3869       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3870     }
3871 
3872     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3873     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3874     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3875     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3876     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3877     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3878     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3879   }
3880 
3881   /* numeric phase */
3882   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3883   for (i=0; i<m; i++) {
3884     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3885     Ii   = i + rstart;
3886     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3887     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3888   }
3889   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3890   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3891   PetscFunctionReturn(0);
3892 }
3893 
3894 #undef __FUNCT__
3895 #define __FUNCT__ "MatFileSplit"
3896 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3897 {
3898   PetscErrorCode    ierr;
3899   PetscMPIInt       rank;
3900   PetscInt          m,N,i,rstart,nnz;
3901   size_t            len;
3902   const PetscInt    *indx;
3903   PetscViewer       out;
3904   char              *name;
3905   Mat               B;
3906   const PetscScalar *values;
3907 
3908   PetscFunctionBegin;
3909   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3910   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3911   /* Should this be the type of the diagonal block of A? */
3912   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3913   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3914   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3915   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3916   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3917   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3918   for (i=0; i<m; i++) {
3919     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3920     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3921     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3922   }
3923   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3924   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3925 
3926   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3927   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3928   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3929   sprintf(name,"%s.%d",outfile,rank);
3930   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3931   ierr = PetscFree(name);CHKERRQ(ierr);
3932   ierr = MatView(B,out);CHKERRQ(ierr);
3933   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3934   ierr = MatDestroy(&B);CHKERRQ(ierr);
3935   PetscFunctionReturn(0);
3936 }
3937 
3938 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3939 #undef __FUNCT__
3940 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3941 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3942 {
3943   PetscErrorCode      ierr;
3944   Mat_Merge_SeqsToMPI *merge;
3945   PetscContainer      container;
3946 
3947   PetscFunctionBegin;
3948   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3949   if (container) {
3950     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3951     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3952     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3953     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3954     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3955     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3956     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3957     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3958     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3959     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3960     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3961     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3962     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3963     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3964     ierr = PetscFree(merge);CHKERRQ(ierr);
3965     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3966   }
3967   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3968   PetscFunctionReturn(0);
3969 }
3970 
3971 #include <../src/mat/utils/freespace.h>
3972 #include <petscbt.h>
3973 
3974 #undef __FUNCT__
3975 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3976 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3977 {
3978   PetscErrorCode      ierr;
3979   MPI_Comm            comm;
3980   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3981   PetscMPIInt         size,rank,taga,*len_s;
3982   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3983   PetscInt            proc,m;
3984   PetscInt            **buf_ri,**buf_rj;
3985   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3986   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3987   MPI_Request         *s_waits,*r_waits;
3988   MPI_Status          *status;
3989   MatScalar           *aa=a->a;
3990   MatScalar           **abuf_r,*ba_i;
3991   Mat_Merge_SeqsToMPI *merge;
3992   PetscContainer      container;
3993 
3994   PetscFunctionBegin;
3995   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3996   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3997 
3998   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3999   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4000 
4001   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4002   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4003 
4004   bi     = merge->bi;
4005   bj     = merge->bj;
4006   buf_ri = merge->buf_ri;
4007   buf_rj = merge->buf_rj;
4008 
4009   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4010   owners = merge->rowmap->range;
4011   len_s  = merge->len_s;
4012 
4013   /* send and recv matrix values */
4014   /*-----------------------------*/
4015   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4016   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4017 
4018   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4019   for (proc=0,k=0; proc<size; proc++) {
4020     if (!len_s[proc]) continue;
4021     i    = owners[proc];
4022     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4023     k++;
4024   }
4025 
4026   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4027   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4028   ierr = PetscFree(status);CHKERRQ(ierr);
4029 
4030   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4031   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4032 
4033   /* insert mat values of mpimat */
4034   /*----------------------------*/
4035   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4036   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4037 
4038   for (k=0; k<merge->nrecv; k++) {
4039     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4040     nrows       = *(buf_ri_k[k]);
4041     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4042     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4043   }
4044 
4045   /* set values of ba */
4046   m = merge->rowmap->n;
4047   for (i=0; i<m; i++) {
4048     arow = owners[rank] + i;
4049     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4050     bnzi = bi[i+1] - bi[i];
4051     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4052 
4053     /* add local non-zero vals of this proc's seqmat into ba */
4054     anzi   = ai[arow+1] - ai[arow];
4055     aj     = a->j + ai[arow];
4056     aa     = a->a + ai[arow];
4057     nextaj = 0;
4058     for (j=0; nextaj<anzi; j++) {
4059       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4060         ba_i[j] += aa[nextaj++];
4061       }
4062     }
4063 
4064     /* add received vals into ba */
4065     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4066       /* i-th row */
4067       if (i == *nextrow[k]) {
4068         anzi   = *(nextai[k]+1) - *nextai[k];
4069         aj     = buf_rj[k] + *(nextai[k]);
4070         aa     = abuf_r[k] + *(nextai[k]);
4071         nextaj = 0;
4072         for (j=0; nextaj<anzi; j++) {
4073           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4074             ba_i[j] += aa[nextaj++];
4075           }
4076         }
4077         nextrow[k]++; nextai[k]++;
4078       }
4079     }
4080     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4081   }
4082   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4083   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4084 
4085   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4086   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4087   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4088   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4089   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4090   PetscFunctionReturn(0);
4091 }
4092 
4093 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4094 
4095 #undef __FUNCT__
4096 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4097 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4098 {
4099   PetscErrorCode      ierr;
4100   Mat                 B_mpi;
4101   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4102   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4103   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4104   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4105   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4106   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4107   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4108   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4109   MPI_Status          *status;
4110   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4111   PetscBT             lnkbt;
4112   Mat_Merge_SeqsToMPI *merge;
4113   PetscContainer      container;
4114 
4115   PetscFunctionBegin;
4116   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4117 
4118   /* make sure it is a PETSc comm */
4119   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4120   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4121   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4122 
4123   ierr = PetscNew(&merge);CHKERRQ(ierr);
4124   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4125 
4126   /* determine row ownership */
4127   /*---------------------------------------------------------*/
4128   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4129   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4130   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4131   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4132   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4133   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4134   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4135 
4136   m      = merge->rowmap->n;
4137   owners = merge->rowmap->range;
4138 
4139   /* determine the number of messages to send, their lengths */
4140   /*---------------------------------------------------------*/
4141   len_s = merge->len_s;
4142 
4143   len          = 0; /* length of buf_si[] */
4144   merge->nsend = 0;
4145   for (proc=0; proc<size; proc++) {
4146     len_si[proc] = 0;
4147     if (proc == rank) {
4148       len_s[proc] = 0;
4149     } else {
4150       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4151       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4152     }
4153     if (len_s[proc]) {
4154       merge->nsend++;
4155       nrows = 0;
4156       for (i=owners[proc]; i<owners[proc+1]; i++) {
4157         if (ai[i+1] > ai[i]) nrows++;
4158       }
4159       len_si[proc] = 2*(nrows+1);
4160       len         += len_si[proc];
4161     }
4162   }
4163 
4164   /* determine the number and length of messages to receive for ij-structure */
4165   /*-------------------------------------------------------------------------*/
4166   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4167   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4168 
4169   /* post the Irecv of j-structure */
4170   /*-------------------------------*/
4171   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4172   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4173 
4174   /* post the Isend of j-structure */
4175   /*--------------------------------*/
4176   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4177 
4178   for (proc=0, k=0; proc<size; proc++) {
4179     if (!len_s[proc]) continue;
4180     i    = owners[proc];
4181     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4182     k++;
4183   }
4184 
4185   /* receives and sends of j-structure are complete */
4186   /*------------------------------------------------*/
4187   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4188   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4189 
4190   /* send and recv i-structure */
4191   /*---------------------------*/
4192   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4193   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4194 
4195   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4196   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4197   for (proc=0,k=0; proc<size; proc++) {
4198     if (!len_s[proc]) continue;
4199     /* form outgoing message for i-structure:
4200          buf_si[0]:                 nrows to be sent
4201                [1:nrows]:           row index (global)
4202                [nrows+1:2*nrows+1]: i-structure index
4203     */
4204     /*-------------------------------------------*/
4205     nrows       = len_si[proc]/2 - 1;
4206     buf_si_i    = buf_si + nrows+1;
4207     buf_si[0]   = nrows;
4208     buf_si_i[0] = 0;
4209     nrows       = 0;
4210     for (i=owners[proc]; i<owners[proc+1]; i++) {
4211       anzi = ai[i+1] - ai[i];
4212       if (anzi) {
4213         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4214         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4215         nrows++;
4216       }
4217     }
4218     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4219     k++;
4220     buf_si += len_si[proc];
4221   }
4222 
4223   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4224   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4225 
4226   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4227   for (i=0; i<merge->nrecv; i++) {
4228     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4229   }
4230 
4231   ierr = PetscFree(len_si);CHKERRQ(ierr);
4232   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4233   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4234   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4235   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4236   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4237   ierr = PetscFree(status);CHKERRQ(ierr);
4238 
4239   /* compute a local seq matrix in each processor */
4240   /*----------------------------------------------*/
4241   /* allocate bi array and free space for accumulating nonzero column info */
4242   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4243   bi[0] = 0;
4244 
4245   /* create and initialize a linked list */
4246   nlnk = N+1;
4247   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4248 
4249   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4250   len  = ai[owners[rank+1]] - ai[owners[rank]];
4251   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4252 
4253   current_space = free_space;
4254 
4255   /* determine symbolic info for each local row */
4256   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4257 
4258   for (k=0; k<merge->nrecv; k++) {
4259     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4260     nrows       = *buf_ri_k[k];
4261     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4262     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4263   }
4264 
4265   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4266   len  = 0;
4267   for (i=0; i<m; i++) {
4268     bnzi = 0;
4269     /* add local non-zero cols of this proc's seqmat into lnk */
4270     arow  = owners[rank] + i;
4271     anzi  = ai[arow+1] - ai[arow];
4272     aj    = a->j + ai[arow];
4273     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4274     bnzi += nlnk;
4275     /* add received col data into lnk */
4276     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4277       if (i == *nextrow[k]) { /* i-th row */
4278         anzi  = *(nextai[k]+1) - *nextai[k];
4279         aj    = buf_rj[k] + *nextai[k];
4280         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4281         bnzi += nlnk;
4282         nextrow[k]++; nextai[k]++;
4283       }
4284     }
4285     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4286 
4287     /* if free space is not available, make more free space */
4288     if (current_space->local_remaining<bnzi) {
4289       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4290       nspacedouble++;
4291     }
4292     /* copy data into free space, then initialize lnk */
4293     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4294     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4295 
4296     current_space->array           += bnzi;
4297     current_space->local_used      += bnzi;
4298     current_space->local_remaining -= bnzi;
4299 
4300     bi[i+1] = bi[i] + bnzi;
4301   }
4302 
4303   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4304 
4305   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4306   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4307   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4308 
4309   /* create symbolic parallel matrix B_mpi */
4310   /*---------------------------------------*/
4311   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4312   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4313   if (n==PETSC_DECIDE) {
4314     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4315   } else {
4316     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4317   }
4318   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4319   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4320   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4321   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4322   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4323 
4324   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4325   B_mpi->assembled    = PETSC_FALSE;
4326   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4327   merge->bi           = bi;
4328   merge->bj           = bj;
4329   merge->buf_ri       = buf_ri;
4330   merge->buf_rj       = buf_rj;
4331   merge->coi          = NULL;
4332   merge->coj          = NULL;
4333   merge->owners_co    = NULL;
4334 
4335   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4336 
4337   /* attach the supporting struct to B_mpi for reuse */
4338   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4339   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4340   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4341   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4342   *mpimat = B_mpi;
4343 
4344   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4345   PetscFunctionReturn(0);
4346 }
4347 
4348 #undef __FUNCT__
4349 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4350 /*@C
4351       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4352                  matrices from each processor
4353 
4354     Collective on MPI_Comm
4355 
4356    Input Parameters:
4357 +    comm - the communicators the parallel matrix will live on
4358 .    seqmat - the input sequential matrices
4359 .    m - number of local rows (or PETSC_DECIDE)
4360 .    n - number of local columns (or PETSC_DECIDE)
4361 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4362 
4363    Output Parameter:
4364 .    mpimat - the parallel matrix generated
4365 
4366     Level: advanced
4367 
4368    Notes:
4369      The dimensions of the sequential matrix in each processor MUST be the same.
4370      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4371      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4372 @*/
4373 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4374 {
4375   PetscErrorCode ierr;
4376   PetscMPIInt    size;
4377 
4378   PetscFunctionBegin;
4379   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4380   if (size == 1) {
4381     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4382     if (scall == MAT_INITIAL_MATRIX) {
4383       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4384     } else {
4385       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4386     }
4387     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4388     PetscFunctionReturn(0);
4389   }
4390   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4391   if (scall == MAT_INITIAL_MATRIX) {
4392     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4393   }
4394   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4395   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4396   PetscFunctionReturn(0);
4397 }
4398 
4399 #undef __FUNCT__
4400 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4401 /*@
4402      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4403           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4404           with MatGetSize()
4405 
4406     Not Collective
4407 
4408    Input Parameters:
4409 +    A - the matrix
4410 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4411 
4412    Output Parameter:
4413 .    A_loc - the local sequential matrix generated
4414 
4415     Level: developer
4416 
4417 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4418 
4419 @*/
4420 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4421 {
4422   PetscErrorCode ierr;
4423   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4424   Mat_SeqAIJ     *mat,*a,*b;
4425   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4426   MatScalar      *aa,*ba,*cam;
4427   PetscScalar    *ca;
4428   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4429   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4430   PetscBool      match;
4431   MPI_Comm       comm;
4432   PetscMPIInt    size;
4433 
4434   PetscFunctionBegin;
4435   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4436   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4437   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4438   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4439   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4440 
4441   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4442   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4443   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4444   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4445   aa = a->a; ba = b->a;
4446   if (scall == MAT_INITIAL_MATRIX) {
4447     if (size == 1) {
4448       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4449       PetscFunctionReturn(0);
4450     }
4451 
4452     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4453     ci[0] = 0;
4454     for (i=0; i<am; i++) {
4455       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4456     }
4457     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4458     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4459     k    = 0;
4460     for (i=0; i<am; i++) {
4461       ncols_o = bi[i+1] - bi[i];
4462       ncols_d = ai[i+1] - ai[i];
4463       /* off-diagonal portion of A */
4464       for (jo=0; jo<ncols_o; jo++) {
4465         col = cmap[*bj];
4466         if (col >= cstart) break;
4467         cj[k]   = col; bj++;
4468         ca[k++] = *ba++;
4469       }
4470       /* diagonal portion of A */
4471       for (j=0; j<ncols_d; j++) {
4472         cj[k]   = cstart + *aj++;
4473         ca[k++] = *aa++;
4474       }
4475       /* off-diagonal portion of A */
4476       for (j=jo; j<ncols_o; j++) {
4477         cj[k]   = cmap[*bj++];
4478         ca[k++] = *ba++;
4479       }
4480     }
4481     /* put together the new matrix */
4482     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4483     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4484     /* Since these are PETSc arrays, change flags to free them as necessary. */
4485     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4486     mat->free_a  = PETSC_TRUE;
4487     mat->free_ij = PETSC_TRUE;
4488     mat->nonew   = 0;
4489   } else if (scall == MAT_REUSE_MATRIX) {
4490     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4491     ci = mat->i; cj = mat->j; cam = mat->a;
4492     for (i=0; i<am; i++) {
4493       /* off-diagonal portion of A */
4494       ncols_o = bi[i+1] - bi[i];
4495       for (jo=0; jo<ncols_o; jo++) {
4496         col = cmap[*bj];
4497         if (col >= cstart) break;
4498         *cam++ = *ba++; bj++;
4499       }
4500       /* diagonal portion of A */
4501       ncols_d = ai[i+1] - ai[i];
4502       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4503       /* off-diagonal portion of A */
4504       for (j=jo; j<ncols_o; j++) {
4505         *cam++ = *ba++; bj++;
4506       }
4507     }
4508   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4509   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 #undef __FUNCT__
4514 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4515 /*@C
4516      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4517 
4518     Not Collective
4519 
4520    Input Parameters:
4521 +    A - the matrix
4522 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4523 -    row, col - index sets of rows and columns to extract (or NULL)
4524 
4525    Output Parameter:
4526 .    A_loc - the local sequential matrix generated
4527 
4528     Level: developer
4529 
4530 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4531 
4532 @*/
4533 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4534 {
4535   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4536   PetscErrorCode ierr;
4537   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4538   IS             isrowa,iscola;
4539   Mat            *aloc;
4540   PetscBool      match;
4541 
4542   PetscFunctionBegin;
4543   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4544   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4545   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4546   if (!row) {
4547     start = A->rmap->rstart; end = A->rmap->rend;
4548     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4549   } else {
4550     isrowa = *row;
4551   }
4552   if (!col) {
4553     start = A->cmap->rstart;
4554     cmap  = a->garray;
4555     nzA   = a->A->cmap->n;
4556     nzB   = a->B->cmap->n;
4557     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4558     ncols = 0;
4559     for (i=0; i<nzB; i++) {
4560       if (cmap[i] < start) idx[ncols++] = cmap[i];
4561       else break;
4562     }
4563     imark = i;
4564     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4565     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4566     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4567   } else {
4568     iscola = *col;
4569   }
4570   if (scall != MAT_INITIAL_MATRIX) {
4571     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4572     aloc[0] = *A_loc;
4573   }
4574   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4575   *A_loc = aloc[0];
4576   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4577   if (!row) {
4578     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4579   }
4580   if (!col) {
4581     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4582   }
4583   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4584   PetscFunctionReturn(0);
4585 }
4586 
4587 #undef __FUNCT__
4588 #define __FUNCT__ "MatGetBrowsOfAcols"
4589 /*@C
4590     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4591 
4592     Collective on Mat
4593 
4594    Input Parameters:
4595 +    A,B - the matrices in mpiaij format
4596 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4597 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4598 
4599    Output Parameter:
4600 +    rowb, colb - index sets of rows and columns of B to extract
4601 -    B_seq - the sequential matrix generated
4602 
4603     Level: developer
4604 
4605 @*/
4606 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4607 {
4608   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4609   PetscErrorCode ierr;
4610   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4611   IS             isrowb,iscolb;
4612   Mat            *bseq=NULL;
4613 
4614   PetscFunctionBegin;
4615   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4616     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4617   }
4618   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4619 
4620   if (scall == MAT_INITIAL_MATRIX) {
4621     start = A->cmap->rstart;
4622     cmap  = a->garray;
4623     nzA   = a->A->cmap->n;
4624     nzB   = a->B->cmap->n;
4625     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4626     ncols = 0;
4627     for (i=0; i<nzB; i++) {  /* row < local row index */
4628       if (cmap[i] < start) idx[ncols++] = cmap[i];
4629       else break;
4630     }
4631     imark = i;
4632     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4633     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4634     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4635     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4636   } else {
4637     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4638     isrowb  = *rowb; iscolb = *colb;
4639     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4640     bseq[0] = *B_seq;
4641   }
4642   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4643   *B_seq = bseq[0];
4644   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4645   if (!rowb) {
4646     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4647   } else {
4648     *rowb = isrowb;
4649   }
4650   if (!colb) {
4651     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4652   } else {
4653     *colb = iscolb;
4654   }
4655   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4656   PetscFunctionReturn(0);
4657 }
4658 
4659 #undef __FUNCT__
4660 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4661 /*
4662     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4663     of the OFF-DIAGONAL portion of local A
4664 
4665     Collective on Mat
4666 
4667    Input Parameters:
4668 +    A,B - the matrices in mpiaij format
4669 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4670 
4671    Output Parameter:
4672 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4673 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4674 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4675 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4676 
4677     Level: developer
4678 
4679 */
4680 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4681 {
4682   VecScatter_MPI_General *gen_to,*gen_from;
4683   PetscErrorCode         ierr;
4684   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4685   Mat_SeqAIJ             *b_oth;
4686   VecScatter             ctx =a->Mvctx;
4687   MPI_Comm               comm;
4688   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4689   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4690   PetscScalar            *rvalues,*svalues;
4691   MatScalar              *b_otha,*bufa,*bufA;
4692   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4693   MPI_Request            *rwaits = NULL,*swaits = NULL;
4694   MPI_Status             *sstatus,rstatus;
4695   PetscMPIInt            jj,size;
4696   PetscInt               *cols,sbs,rbs;
4697   PetscScalar            *vals;
4698 
4699   PetscFunctionBegin;
4700   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4701   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4702 
4703   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4704     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4705   }
4706   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4707   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4708 
4709   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4710   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4711   rvalues  = gen_from->values; /* holds the length of receiving row */
4712   svalues  = gen_to->values;   /* holds the length of sending row */
4713   nrecvs   = gen_from->n;
4714   nsends   = gen_to->n;
4715 
4716   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4717   srow    = gen_to->indices;    /* local row index to be sent */
4718   sstarts = gen_to->starts;
4719   sprocs  = gen_to->procs;
4720   sstatus = gen_to->sstatus;
4721   sbs     = gen_to->bs;
4722   rstarts = gen_from->starts;
4723   rprocs  = gen_from->procs;
4724   rbs     = gen_from->bs;
4725 
4726   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4727   if (scall == MAT_INITIAL_MATRIX) {
4728     /* i-array */
4729     /*---------*/
4730     /*  post receives */
4731     for (i=0; i<nrecvs; i++) {
4732       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4733       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4734       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4735     }
4736 
4737     /* pack the outgoing message */
4738     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4739 
4740     sstartsj[0] = 0;
4741     rstartsj[0] = 0;
4742     len         = 0; /* total length of j or a array to be sent */
4743     k           = 0;
4744     for (i=0; i<nsends; i++) {
4745       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4746       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4747       for (j=0; j<nrows; j++) {
4748         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4749         for (l=0; l<sbs; l++) {
4750           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4751 
4752           rowlen[j*sbs+l] = ncols;
4753 
4754           len += ncols;
4755           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4756         }
4757         k++;
4758       }
4759       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4760 
4761       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4762     }
4763     /* recvs and sends of i-array are completed */
4764     i = nrecvs;
4765     while (i--) {
4766       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4767     }
4768     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4769 
4770     /* allocate buffers for sending j and a arrays */
4771     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4772     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4773 
4774     /* create i-array of B_oth */
4775     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4776 
4777     b_othi[0] = 0;
4778     len       = 0; /* total length of j or a array to be received */
4779     k         = 0;
4780     for (i=0; i<nrecvs; i++) {
4781       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4782       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
4783       for (j=0; j<nrows; j++) {
4784         b_othi[k+1] = b_othi[k] + rowlen[j];
4785         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
4786         k++;
4787       }
4788       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4789     }
4790 
4791     /* allocate space for j and a arrrays of B_oth */
4792     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4793     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4794 
4795     /* j-array */
4796     /*---------*/
4797     /*  post receives of j-array */
4798     for (i=0; i<nrecvs; i++) {
4799       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4800       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4801     }
4802 
4803     /* pack the outgoing message j-array */
4804     k = 0;
4805     for (i=0; i<nsends; i++) {
4806       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4807       bufJ  = bufj+sstartsj[i];
4808       for (j=0; j<nrows; j++) {
4809         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4810         for (ll=0; ll<sbs; ll++) {
4811           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4812           for (l=0; l<ncols; l++) {
4813             *bufJ++ = cols[l];
4814           }
4815           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4816         }
4817       }
4818       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4819     }
4820 
4821     /* recvs and sends of j-array are completed */
4822     i = nrecvs;
4823     while (i--) {
4824       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4825     }
4826     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4827   } else if (scall == MAT_REUSE_MATRIX) {
4828     sstartsj = *startsj_s;
4829     rstartsj = *startsj_r;
4830     bufa     = *bufa_ptr;
4831     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4832     b_otha   = b_oth->a;
4833   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4834 
4835   /* a-array */
4836   /*---------*/
4837   /*  post receives of a-array */
4838   for (i=0; i<nrecvs; i++) {
4839     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4840     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4841   }
4842 
4843   /* pack the outgoing message a-array */
4844   k = 0;
4845   for (i=0; i<nsends; i++) {
4846     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4847     bufA  = bufa+sstartsj[i];
4848     for (j=0; j<nrows; j++) {
4849       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4850       for (ll=0; ll<sbs; ll++) {
4851         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4852         for (l=0; l<ncols; l++) {
4853           *bufA++ = vals[l];
4854         }
4855         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4856       }
4857     }
4858     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4859   }
4860   /* recvs and sends of a-array are completed */
4861   i = nrecvs;
4862   while (i--) {
4863     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4864   }
4865   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4866   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4867 
4868   if (scall == MAT_INITIAL_MATRIX) {
4869     /* put together the new matrix */
4870     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4871 
4872     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4873     /* Since these are PETSc arrays, change flags to free them as necessary. */
4874     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4875     b_oth->free_a  = PETSC_TRUE;
4876     b_oth->free_ij = PETSC_TRUE;
4877     b_oth->nonew   = 0;
4878 
4879     ierr = PetscFree(bufj);CHKERRQ(ierr);
4880     if (!startsj_s || !bufa_ptr) {
4881       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4882       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4883     } else {
4884       *startsj_s = sstartsj;
4885       *startsj_r = rstartsj;
4886       *bufa_ptr  = bufa;
4887     }
4888   }
4889   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4890   PetscFunctionReturn(0);
4891 }
4892 
4893 #undef __FUNCT__
4894 #define __FUNCT__ "MatGetCommunicationStructs"
4895 /*@C
4896   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4897 
4898   Not Collective
4899 
4900   Input Parameters:
4901 . A - The matrix in mpiaij format
4902 
4903   Output Parameter:
4904 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4905 . colmap - A map from global column index to local index into lvec
4906 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4907 
4908   Level: developer
4909 
4910 @*/
4911 #if defined(PETSC_USE_CTABLE)
4912 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4913 #else
4914 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4915 #endif
4916 {
4917   Mat_MPIAIJ *a;
4918 
4919   PetscFunctionBegin;
4920   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4921   PetscValidPointer(lvec, 2);
4922   PetscValidPointer(colmap, 3);
4923   PetscValidPointer(multScatter, 4);
4924   a = (Mat_MPIAIJ*) A->data;
4925   if (lvec) *lvec = a->lvec;
4926   if (colmap) *colmap = a->colmap;
4927   if (multScatter) *multScatter = a->Mvctx;
4928   PetscFunctionReturn(0);
4929 }
4930 
4931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4934 #if defined(PETSC_HAVE_ELEMENTAL)
4935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4936 #endif
4937 
4938 #undef __FUNCT__
4939 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4940 /*
4941     Computes (B'*A')' since computing B*A directly is untenable
4942 
4943                n                       p                          p
4944         (              )       (              )         (                  )
4945       m (      A       )  *  n (       B      )   =   m (         C        )
4946         (              )       (              )         (                  )
4947 
4948 */
4949 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4950 {
4951   PetscErrorCode ierr;
4952   Mat            At,Bt,Ct;
4953 
4954   PetscFunctionBegin;
4955   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4956   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4957   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4958   ierr = MatDestroy(&At);CHKERRQ(ierr);
4959   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4960   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4961   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4962   PetscFunctionReturn(0);
4963 }
4964 
4965 #undef __FUNCT__
4966 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4967 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4968 {
4969   PetscErrorCode ierr;
4970   PetscInt       m=A->rmap->n,n=B->cmap->n;
4971   Mat            Cmat;
4972 
4973   PetscFunctionBegin;
4974   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4975   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4976   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4977   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4978   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4979   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4980   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4981   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4982 
4983   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4984 
4985   *C = Cmat;
4986   PetscFunctionReturn(0);
4987 }
4988 
4989 /* ----------------------------------------------------------------*/
4990 #undef __FUNCT__
4991 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4992 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4993 {
4994   PetscErrorCode ierr;
4995 
4996   PetscFunctionBegin;
4997   if (scall == MAT_INITIAL_MATRIX) {
4998     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4999     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5000     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5001   }
5002   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5003   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5004   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5005   PetscFunctionReturn(0);
5006 }
5007 
5008 /*MC
5009    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5010 
5011    Options Database Keys:
5012 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5013 
5014   Level: beginner
5015 
5016 .seealso: MatCreateAIJ()
5017 M*/
5018 
5019 #undef __FUNCT__
5020 #define __FUNCT__ "MatCreate_MPIAIJ"
5021 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5022 {
5023   Mat_MPIAIJ     *b;
5024   PetscErrorCode ierr;
5025   PetscMPIInt    size;
5026 
5027   PetscFunctionBegin;
5028   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5029 
5030   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5031   B->data       = (void*)b;
5032   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5033   B->assembled  = PETSC_FALSE;
5034   B->insertmode = NOT_SET_VALUES;
5035   b->size       = size;
5036 
5037   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5038 
5039   /* build cache for off array entries formed */
5040   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5041 
5042   b->donotstash  = PETSC_FALSE;
5043   b->colmap      = 0;
5044   b->garray      = 0;
5045   b->roworiented = PETSC_TRUE;
5046 
5047   /* stuff used for matrix vector multiply */
5048   b->lvec  = NULL;
5049   b->Mvctx = NULL;
5050 
5051   /* stuff for MatGetRow() */
5052   b->rowindices   = 0;
5053   b->rowvalues    = 0;
5054   b->getrowactive = PETSC_FALSE;
5055 
5056   /* flexible pointer used in CUSP/CUSPARSE classes */
5057   b->spptr = NULL;
5058 
5059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5070 #if defined(PETSC_HAVE_ELEMENTAL)
5071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5072 #endif
5073   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5075   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5076   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5077   PetscFunctionReturn(0);
5078 }
5079 
5080 #undef __FUNCT__
5081 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5082 /*@C
5083      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5084          and "off-diagonal" part of the matrix in CSR format.
5085 
5086    Collective on MPI_Comm
5087 
5088    Input Parameters:
5089 +  comm - MPI communicator
5090 .  m - number of local rows (Cannot be PETSC_DECIDE)
5091 .  n - This value should be the same as the local size used in creating the
5092        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5093        calculated if N is given) For square matrices n is almost always m.
5094 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5095 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5096 .   i - row indices for "diagonal" portion of matrix
5097 .   j - column indices
5098 .   a - matrix values
5099 .   oi - row indices for "off-diagonal" portion of matrix
5100 .   oj - column indices
5101 -   oa - matrix values
5102 
5103    Output Parameter:
5104 .   mat - the matrix
5105 
5106    Level: advanced
5107 
5108    Notes:
5109        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5110        must free the arrays once the matrix has been destroyed and not before.
5111 
5112        The i and j indices are 0 based
5113 
5114        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5115 
5116        This sets local rows and cannot be used to set off-processor values.
5117 
5118        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5119        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5120        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5121        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5122        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5123        communication if it is known that only local entries will be set.
5124 
5125 .keywords: matrix, aij, compressed row, sparse, parallel
5126 
5127 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5128           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5129 @*/
5130 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5131 {
5132   PetscErrorCode ierr;
5133   Mat_MPIAIJ     *maij;
5134 
5135   PetscFunctionBegin;
5136   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5137   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5138   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5139   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5140   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5141   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5142   maij = (Mat_MPIAIJ*) (*mat)->data;
5143 
5144   (*mat)->preallocated = PETSC_TRUE;
5145 
5146   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5147   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5148 
5149   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5150   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5151 
5152   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5153   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5154   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5155   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5156 
5157   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5158   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5159   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5160   PetscFunctionReturn(0);
5161 }
5162 
5163 /*
5164     Special version for direct calls from Fortran
5165 */
5166 #include <petsc/private/fortranimpl.h>
5167 
5168 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5169 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5170 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5171 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5172 #endif
5173 
5174 /* Change these macros so can be used in void function */
5175 #undef CHKERRQ
5176 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5177 #undef SETERRQ2
5178 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5179 #undef SETERRQ3
5180 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5181 #undef SETERRQ
5182 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5183 
5184 #undef __FUNCT__
5185 #define __FUNCT__ "matsetvaluesmpiaij_"
5186 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5187 {
5188   Mat            mat  = *mmat;
5189   PetscInt       m    = *mm, n = *mn;
5190   InsertMode     addv = *maddv;
5191   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5192   PetscScalar    value;
5193   PetscErrorCode ierr;
5194 
5195   MatCheckPreallocated(mat,1);
5196   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5197 
5198 #if defined(PETSC_USE_DEBUG)
5199   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5200 #endif
5201   {
5202     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5203     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5204     PetscBool roworiented = aij->roworiented;
5205 
5206     /* Some Variables required in the macro */
5207     Mat        A                 = aij->A;
5208     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5209     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5210     MatScalar  *aa               = a->a;
5211     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5212     Mat        B                 = aij->B;
5213     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5214     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5215     MatScalar  *ba               = b->a;
5216 
5217     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5218     PetscInt  nonew = a->nonew;
5219     MatScalar *ap1,*ap2;
5220 
5221     PetscFunctionBegin;
5222     for (i=0; i<m; i++) {
5223       if (im[i] < 0) continue;
5224 #if defined(PETSC_USE_DEBUG)
5225       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5226 #endif
5227       if (im[i] >= rstart && im[i] < rend) {
5228         row      = im[i] - rstart;
5229         lastcol1 = -1;
5230         rp1      = aj + ai[row];
5231         ap1      = aa + ai[row];
5232         rmax1    = aimax[row];
5233         nrow1    = ailen[row];
5234         low1     = 0;
5235         high1    = nrow1;
5236         lastcol2 = -1;
5237         rp2      = bj + bi[row];
5238         ap2      = ba + bi[row];
5239         rmax2    = bimax[row];
5240         nrow2    = bilen[row];
5241         low2     = 0;
5242         high2    = nrow2;
5243 
5244         for (j=0; j<n; j++) {
5245           if (roworiented) value = v[i*n+j];
5246           else value = v[i+j*m];
5247           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5248           if (in[j] >= cstart && in[j] < cend) {
5249             col = in[j] - cstart;
5250             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5251           } else if (in[j] < 0) continue;
5252 #if defined(PETSC_USE_DEBUG)
5253           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5254 #endif
5255           else {
5256             if (mat->was_assembled) {
5257               if (!aij->colmap) {
5258                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5259               }
5260 #if defined(PETSC_USE_CTABLE)
5261               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5262               col--;
5263 #else
5264               col = aij->colmap[in[j]] - 1;
5265 #endif
5266               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5267                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5268                 col  =  in[j];
5269                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5270                 B     = aij->B;
5271                 b     = (Mat_SeqAIJ*)B->data;
5272                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5273                 rp2   = bj + bi[row];
5274                 ap2   = ba + bi[row];
5275                 rmax2 = bimax[row];
5276                 nrow2 = bilen[row];
5277                 low2  = 0;
5278                 high2 = nrow2;
5279                 bm    = aij->B->rmap->n;
5280                 ba    = b->a;
5281               }
5282             } else col = in[j];
5283             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5284           }
5285         }
5286       } else if (!aij->donotstash) {
5287         if (roworiented) {
5288           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5289         } else {
5290           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5291         }
5292       }
5293     }
5294   }
5295   PetscFunctionReturnVoid();
5296 }
5297 
5298