xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision dced61a5cfeeeda68dfa4ee3e53b34d3cfb8da9f)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685   InsertMode     addv;
686 
687   PetscFunctionBegin;
688   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
689 
690   /* make sure all processors are either in INSERTMODE or ADDMODE */
691   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
692   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
693   mat->insertmode = addv; /* in case this processor had no cache */
694 
695   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
696   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
697   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
703 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
704 {
705   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
706   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
707   PetscErrorCode ierr;
708   PetscMPIInt    n;
709   PetscInt       i,j,rstart,ncols,flg;
710   PetscInt       *row,*col;
711   PetscBool      other_disassembled;
712   PetscScalar    *val;
713   InsertMode     addv = mat->insertmode;
714 
715   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
716 
717   PetscFunctionBegin;
718   if (!aij->donotstash && !mat->nooffprocentries) {
719     while (1) {
720       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
721       if (!flg) break;
722 
723       for (i=0; i<n; ) {
724         /* Now identify the consecutive vals belonging to the same row */
725         for (j=i,rstart=row[j]; j<n; j++) {
726           if (row[j] != rstart) break;
727         }
728         if (j < n) ncols = j-i;
729         else       ncols = n-i;
730         /* Now assemble all these values with a single function call */
731         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
732 
733         i = j;
734       }
735     }
736     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
737   }
738   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
740 
741   /* determine if any processor has disassembled, if so we must
742      also disassemble ourselfs, in order that we may reassemble. */
743   /*
744      if nonzero structure of submatrix B cannot change then we know that
745      no processor disassembled thus we can skip this stuff
746   */
747   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
748     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
749     if (mat->was_assembled && !other_disassembled) {
750       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
751     }
752   }
753   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
754     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
755   }
756   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
757   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
758   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
759 
760   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
761 
762   aij->rowvalues = 0;
763 
764   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
765   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
766 
767   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
768   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
769     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
770     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
777 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
778 {
779   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
780   PetscErrorCode ierr;
781 
782   PetscFunctionBegin;
783   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
784   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatZeroRows_MPIAIJ"
790 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
791 {
792   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
793   PetscInt      *owners = A->rmap->range;
794   PetscInt       n      = A->rmap->n;
795   PetscSF        sf;
796   PetscInt      *lrows;
797   PetscSFNode   *rrows;
798   PetscInt       r, p = 0, len = 0;
799   PetscErrorCode ierr;
800 
801   PetscFunctionBegin;
802   /* Create SF where leaves are input rows and roots are owned rows */
803   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
804   for (r = 0; r < n; ++r) lrows[r] = -1;
805   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
806   for (r = 0; r < N; ++r) {
807     const PetscInt idx   = rows[r];
808     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
809     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
810       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
811     }
812     if (A->nooffproczerorows) {
813       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
814       lrows[len++] = idx - owners[p];
815     } else {
816       rrows[r].rank = p;
817       rrows[r].index = rows[r] - owners[p];
818     }
819   }
820   if (!A->nooffproczerorows) {
821     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
822     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
823     /* Collect flags for rows to be zeroed */
824     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
825     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
826     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
827     /* Compress and put in row numbers */
828     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
829   }
830   /* fix right hand side if needed */
831   if (x && b) {
832     const PetscScalar *xx;
833     PetscScalar       *bb;
834 
835     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
836     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
837     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
838     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
839     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
840   }
841   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
843   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
844     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
848     for (r = 0; r < len; ++r) {
849       const PetscInt row = lrows[r] + A->rmap->rstart;
850       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
851     }
852     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
853     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   } else {
855     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
856   }
857   ierr = PetscFree(lrows);CHKERRQ(ierr);
858 
859   /* only change matrix nonzero state if pattern was allowed to be changed */
860   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
861     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
862     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
869 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
870 {
871   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
872   PetscErrorCode    ierr;
873   PetscMPIInt       n = A->rmap->n;
874   PetscInt          i,j,r,m,p = 0,len = 0;
875   PetscInt          *lrows,*owners = A->rmap->range;
876   PetscSFNode       *rrows;
877   PetscSF           sf;
878   const PetscScalar *xx;
879   PetscScalar       *bb,*mask;
880   Vec               xmask,lmask;
881   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
882   const PetscInt    *aj, *ii,*ridx;
883   PetscScalar       *aa;
884 
885   PetscFunctionBegin;
886   /* Create SF where leaves are input rows and roots are owned rows */
887   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
888   for (r = 0; r < n; ++r) lrows[r] = -1;
889   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
890   for (r = 0; r < N; ++r) {
891     const PetscInt idx   = rows[r];
892     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
893     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
894       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
895     }
896     rrows[r].rank  = p;
897     rrows[r].index = rows[r] - owners[p];
898   }
899   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
900   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
901   /* Collect flags for rows to be zeroed */
902   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
903   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
904   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
905   /* Compress and put in row numbers */
906   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
907   /* zero diagonal part of matrix */
908   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
909   /* handle off diagonal part of matrix */
910   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
911   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
912   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
913   for (i=0; i<len; i++) bb[lrows[i]] = 1;
914   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
915   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
918   if (x) {
919     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
922     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
923   }
924   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
925   /* remove zeroed rows of off diagonal matrix */
926   ii = aij->i;
927   for (i=0; i<len; i++) {
928     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
929   }
930   /* loop over all elements of off process part of matrix zeroing removed columns*/
931   if (aij->compressedrow.use) {
932     m    = aij->compressedrow.nrows;
933     ii   = aij->compressedrow.i;
934     ridx = aij->compressedrow.rindex;
935     for (i=0; i<m; i++) {
936       n  = ii[i+1] - ii[i];
937       aj = aij->j + ii[i];
938       aa = aij->a + ii[i];
939 
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[*ridx] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948       ridx++;
949     }
950   } else { /* do not use compressed row format */
951     m = l->B->rmap->n;
952     for (i=0; i<m; i++) {
953       n  = ii[i+1] - ii[i];
954       aj = aij->j + ii[i];
955       aa = aij->a + ii[i];
956       for (j=0; j<n; j++) {
957         if (PetscAbsScalar(mask[*aj])) {
958           if (b) bb[i] -= *aa*xx[*aj];
959           *aa = 0.0;
960         }
961         aa++;
962         aj++;
963       }
964     }
965   }
966   if (x) {
967     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
968     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   }
970   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
971   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
972   ierr = PetscFree(lrows);CHKERRQ(ierr);
973 
974   /* only change matrix nonzero state if pattern was allowed to be changed */
975   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
976     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
977     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatMult_MPIAIJ"
984 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
985 {
986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
987   PetscErrorCode ierr;
988   PetscInt       nt;
989 
990   PetscFunctionBegin;
991   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
992   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
993   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
995   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1002 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1003 {
1004   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1014 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1015 {
1016   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1022   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1029 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1030 {
1031   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1032   PetscErrorCode ierr;
1033   PetscBool      merged;
1034 
1035   PetscFunctionBegin;
1036   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1037   /* do nondiagonal part */
1038   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1039   if (!merged) {
1040     /* send it on its way */
1041     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1042     /* do local part */
1043     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1044     /* receive remote parts: note this assumes the values are not actually */
1045     /* added in yy until the next line, */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   } else {
1048     /* do local part */
1049     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1050     /* send it on its way */
1051     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1052     /* values actually were received in the Begin() but we need to call this nop */
1053     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1060 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1061 {
1062   MPI_Comm       comm;
1063   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1064   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1065   IS             Me,Notme;
1066   PetscErrorCode ierr;
1067   PetscInt       M,N,first,last,*notme,i;
1068   PetscMPIInt    size;
1069 
1070   PetscFunctionBegin;
1071   /* Easy test: symmetric diagonal block */
1072   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1073   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1074   if (!*f) PetscFunctionReturn(0);
1075   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1076   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1077   if (size == 1) PetscFunctionReturn(0);
1078 
1079   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1080   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1081   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1082   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1083   for (i=0; i<first; i++) notme[i] = i;
1084   for (i=last; i<M; i++) notme[i-last+first] = i;
1085   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1086   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1087   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1088   Aoff = Aoffs[0];
1089   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1090   Boff = Boffs[0];
1091   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1092   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1093   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1094   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1095   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1096   ierr = PetscFree(notme);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1102 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   /* do nondiagonal part */
1109   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1110   /* send it on its way */
1111   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1112   /* do local part */
1113   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1114   /* receive remote parts */
1115   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*
1120   This only works correctly for square matrices where the subblock A->A is the
1121    diagonal block
1122 */
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1125 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1126 {
1127   PetscErrorCode ierr;
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129 
1130   PetscFunctionBegin;
1131   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1132   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1133   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatScale_MPIAIJ"
1139 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1140 {
1141   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1146   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatDestroy_MPIAIJ"
1152 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1153 {
1154   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158 #if defined(PETSC_USE_LOG)
1159   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1160 #endif
1161   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1162   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1163   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1164   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1165 #if defined(PETSC_USE_CTABLE)
1166   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1167 #else
1168   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1169 #endif
1170   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1171   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1172   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1173   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1174   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1175   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1176 
1177   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1186 #if defined(PETSC_HAVE_ELEMENTAL)
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1188 #endif
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1194 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1195 {
1196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1197   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1198   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1199   PetscErrorCode ierr;
1200   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1201   int            fd;
1202   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1203   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1204   PetscScalar    *column_values;
1205   PetscInt       message_count,flowcontrolcount;
1206   FILE           *file;
1207 
1208   PetscFunctionBegin;
1209   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1210   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1211   nz   = A->nz + B->nz;
1212   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1213   if (!rank) {
1214     header[0] = MAT_FILE_CLASSID;
1215     header[1] = mat->rmap->N;
1216     header[2] = mat->cmap->N;
1217 
1218     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     /* get largest number of rows any processor has */
1221     rlen  = mat->rmap->n;
1222     range = mat->rmap->range;
1223     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1224   } else {
1225     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     rlen = mat->rmap->n;
1227   }
1228 
1229   /* load up the local row counts */
1230   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1231   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1232 
1233   /* store the row lengths to the file */
1234   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1235   if (!rank) {
1236     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1239       rlen = range[i+1] - range[i];
1240       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1246     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1248   }
1249   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1250 
1251   /* load up the local column indices */
1252   nzmax = nz; /* th processor needs space a largest processor needs */
1253   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1255   cnt   = 0;
1256   for (i=0; i<mat->rmap->n; i++) {
1257     for (j=B->i[i]; j<B->i[i+1]; j++) {
1258       if ((col = garray[B->j[j]]) > cstart) break;
1259       column_indices[cnt++] = col;
1260     }
1261     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1262     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1263   }
1264   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1265 
1266   /* store the column indices to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1275       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1277     }
1278     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1279   } else {
1280     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1281     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1284   }
1285   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1286 
1287   /* load up the local column values */
1288   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1289   cnt  = 0;
1290   for (i=0; i<mat->rmap->n; i++) {
1291     for (j=B->i[i]; j<B->i[i+1]; j++) {
1292       if (garray[B->j[j]] > cstart) break;
1293       column_values[cnt++] = B->a[j];
1294     }
1295     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1296     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1297   }
1298   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1299 
1300   /* store the column values to the file */
1301   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1302   if (!rank) {
1303     MPI_Status status;
1304     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1305     for (i=1; i<size; i++) {
1306       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1307       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1308       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1309       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1311     }
1312     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1313   } else {
1314     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1315     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1316     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1317     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1318   }
1319   ierr = PetscFree(column_values);CHKERRQ(ierr);
1320 
1321   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1322   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #include <petscdraw.h>
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1329 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1330 {
1331   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1332   PetscErrorCode    ierr;
1333   PetscMPIInt       rank = aij->rank,size = aij->size;
1334   PetscBool         isdraw,iascii,isbinary;
1335   PetscViewer       sviewer;
1336   PetscViewerFormat format;
1337 
1338   PetscFunctionBegin;
1339   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1340   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1341   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1342   if (iascii) {
1343     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1344     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1345       MatInfo   info;
1346       PetscBool inodes;
1347 
1348       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1349       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1350       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1351       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1352       if (!inodes) {
1353         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1354                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1355       } else {
1356         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1357                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1358       }
1359       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1361       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1362       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1363       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1365       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1366       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1367       PetscFunctionReturn(0);
1368     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1369       PetscInt inodecount,inodelimit,*inodes;
1370       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1371       if (inodes) {
1372         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1373       } else {
1374         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1375       }
1376       PetscFunctionReturn(0);
1377     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1378       PetscFunctionReturn(0);
1379     }
1380   } else if (isbinary) {
1381     if (size == 1) {
1382       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1383       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1384     } else {
1385       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1386     }
1387     PetscFunctionReturn(0);
1388   } else if (isdraw) {
1389     PetscDraw draw;
1390     PetscBool isnull;
1391     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1392     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1393   }
1394 
1395   {
1396     /* assemble the entire matrix onto first processor. */
1397     Mat        A;
1398     Mat_SeqAIJ *Aloc;
1399     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1400     MatScalar  *a;
1401 
1402     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1403     if (!rank) {
1404       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1405     } else {
1406       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1407     }
1408     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1409     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1410     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1411     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1412     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1413 
1414     /* copy over the A part */
1415     Aloc = (Mat_SeqAIJ*)aij->A->data;
1416     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1417     row  = mat->rmap->rstart;
1418     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1423     }
1424     aj = Aloc->j;
1425     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1426 
1427     /* copy over the B part */
1428     Aloc = (Mat_SeqAIJ*)aij->B->data;
1429     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1430     row  = mat->rmap->rstart;
1431     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1432     ct   = cols;
1433     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1434     for (i=0; i<m; i++) {
1435       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1436       row++;
1437       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1438     }
1439     ierr = PetscFree(ct);CHKERRQ(ierr);
1440     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1441     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1442     /*
1443        Everyone has to call to draw the matrix since the graphics waits are
1444        synchronized across all processors that share the PetscDraw object
1445     */
1446     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1447     if (!rank) {
1448       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1449       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1450     }
1451     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1452     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1453     ierr = MatDestroy(&A);CHKERRQ(ierr);
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ"
1460 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1461 {
1462   PetscErrorCode ierr;
1463   PetscBool      iascii,isdraw,issocket,isbinary;
1464 
1465   PetscFunctionBegin;
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1470   if (iascii || isdraw || isbinary || issocket) {
1471     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatSOR_MPIAIJ"
1478 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1479 {
1480   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1481   PetscErrorCode ierr;
1482   Vec            bb1 = 0;
1483   PetscBool      hasop;
1484 
1485   PetscFunctionBegin;
1486   if (flag == SOR_APPLY_UPPER) {
1487     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1488     PetscFunctionReturn(0);
1489   }
1490 
1491   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1492     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1493   }
1494 
1495   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1496     if (flag & SOR_ZERO_INITIAL_GUESS) {
1497       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1498       its--;
1499     }
1500 
1501     while (its--) {
1502       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504 
1505       /* update rhs: bb1 = bb - B*x */
1506       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1507       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1508 
1509       /* local sweep */
1510       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1511     }
1512   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1513     if (flag & SOR_ZERO_INITIAL_GUESS) {
1514       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1515       its--;
1516     }
1517     while (its--) {
1518       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520 
1521       /* update rhs: bb1 = bb - B*x */
1522       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1523       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1524 
1525       /* local sweep */
1526       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1527     }
1528   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1529     if (flag & SOR_ZERO_INITIAL_GUESS) {
1530       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1531       its--;
1532     }
1533     while (its--) {
1534       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536 
1537       /* update rhs: bb1 = bb - B*x */
1538       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1539       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1540 
1541       /* local sweep */
1542       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1543     }
1544   } else if (flag & SOR_EISENSTAT) {
1545     Vec xx1;
1546 
1547     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1548     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1549 
1550     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552     if (!mat->diag) {
1553       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1554       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1555     }
1556     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1557     if (hasop) {
1558       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1559     } else {
1560       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1561     }
1562     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1563 
1564     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1565 
1566     /* local sweep */
1567     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1568     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1569     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1570   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1571 
1572   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatPermute_MPIAIJ"
1578 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1579 {
1580   Mat            aA,aB,Aperm;
1581   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1582   PetscScalar    *aa,*ba;
1583   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1584   PetscSF        rowsf,sf;
1585   IS             parcolp = NULL;
1586   PetscBool      done;
1587   PetscErrorCode ierr;
1588 
1589   PetscFunctionBegin;
1590   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1591   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1592   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1593   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1594 
1595   /* Invert row permutation to find out where my rows should go */
1596   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1597   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1598   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1599   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1600   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1601   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1602 
1603   /* Invert column permutation to find out where my columns should go */
1604   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1605   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1606   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1607   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1608   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1609   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1610   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1611 
1612   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1613   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1614   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1615 
1616   /* Find out where my gcols should go */
1617   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1618   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1619   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1620   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1621   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1622   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1623   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1624   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1625 
1626   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1627   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1628   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1629   for (i=0; i<m; i++) {
1630     PetscInt row = rdest[i],rowner;
1631     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1632     for (j=ai[i]; j<ai[i+1]; j++) {
1633       PetscInt cowner,col = cdest[aj[j]];
1634       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1635       if (rowner == cowner) dnnz[i]++;
1636       else onnz[i]++;
1637     }
1638     for (j=bi[i]; j<bi[i+1]; j++) {
1639       PetscInt cowner,col = gcdest[bj[j]];
1640       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1641       if (rowner == cowner) dnnz[i]++;
1642       else onnz[i]++;
1643     }
1644   }
1645   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1646   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1647   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1648   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1649   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1650 
1651   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1652   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1653   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1654   for (i=0; i<m; i++) {
1655     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1656     PetscInt j0,rowlen;
1657     rowlen = ai[i+1] - ai[i];
1658     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1659       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1660       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1661     }
1662     rowlen = bi[i+1] - bi[i];
1663     for (j0=j=0; j<rowlen; j0=j) {
1664       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1665       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1666     }
1667   }
1668   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1671   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1672   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1673   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1674   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1675   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1676   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1677   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1678   *B = Aperm;
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1684 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1685 {
1686   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1687   PetscErrorCode ierr;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1691   if (ghosts) *ghosts = aij->garray;
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1697 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1698 {
1699   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1700   Mat            A    = mat->A,B = mat->B;
1701   PetscErrorCode ierr;
1702   PetscReal      isend[5],irecv[5];
1703 
1704   PetscFunctionBegin;
1705   info->block_size = 1.0;
1706   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1707 
1708   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1709   isend[3] = info->memory;  isend[4] = info->mallocs;
1710 
1711   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1712 
1713   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1714   isend[3] += info->memory;  isend[4] += info->mallocs;
1715   if (flag == MAT_LOCAL) {
1716     info->nz_used      = isend[0];
1717     info->nz_allocated = isend[1];
1718     info->nz_unneeded  = isend[2];
1719     info->memory       = isend[3];
1720     info->mallocs      = isend[4];
1721   } else if (flag == MAT_GLOBAL_MAX) {
1722     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1723 
1724     info->nz_used      = irecv[0];
1725     info->nz_allocated = irecv[1];
1726     info->nz_unneeded  = irecv[2];
1727     info->memory       = irecv[3];
1728     info->mallocs      = irecv[4];
1729   } else if (flag == MAT_GLOBAL_SUM) {
1730     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1731 
1732     info->nz_used      = irecv[0];
1733     info->nz_allocated = irecv[1];
1734     info->nz_unneeded  = irecv[2];
1735     info->memory       = irecv[3];
1736     info->mallocs      = irecv[4];
1737   }
1738   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1739   info->fill_ratio_needed = 0;
1740   info->factor_mallocs    = 0;
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatSetOption_MPIAIJ"
1746 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1747 {
1748   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1749   PetscErrorCode ierr;
1750 
1751   PetscFunctionBegin;
1752   switch (op) {
1753   case MAT_NEW_NONZERO_LOCATIONS:
1754   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1755   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1756   case MAT_KEEP_NONZERO_PATTERN:
1757   case MAT_NEW_NONZERO_LOCATION_ERR:
1758   case MAT_USE_INODES:
1759   case MAT_IGNORE_ZERO_ENTRIES:
1760     MatCheckPreallocated(A,1);
1761     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1762     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1763     break;
1764   case MAT_ROW_ORIENTED:
1765     a->roworiented = flg;
1766 
1767     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1768     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1769     break;
1770   case MAT_NEW_DIAGONALS:
1771     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1772     break;
1773   case MAT_IGNORE_OFF_PROC_ENTRIES:
1774     a->donotstash = flg;
1775     break;
1776   case MAT_SPD:
1777     A->spd_set = PETSC_TRUE;
1778     A->spd     = flg;
1779     if (flg) {
1780       A->symmetric                  = PETSC_TRUE;
1781       A->structurally_symmetric     = PETSC_TRUE;
1782       A->symmetric_set              = PETSC_TRUE;
1783       A->structurally_symmetric_set = PETSC_TRUE;
1784     }
1785     break;
1786   case MAT_SYMMETRIC:
1787     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1788     break;
1789   case MAT_STRUCTURALLY_SYMMETRIC:
1790     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1791     break;
1792   case MAT_HERMITIAN:
1793     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1794     break;
1795   case MAT_SYMMETRY_ETERNAL:
1796     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1797     break;
1798   default:
1799     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatGetRow_MPIAIJ"
1806 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1807 {
1808   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1809   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1810   PetscErrorCode ierr;
1811   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1812   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1813   PetscInt       *cmap,*idx_p;
1814 
1815   PetscFunctionBegin;
1816   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1817   mat->getrowactive = PETSC_TRUE;
1818 
1819   if (!mat->rowvalues && (idx || v)) {
1820     /*
1821         allocate enough space to hold information from the longest row.
1822     */
1823     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1824     PetscInt   max = 1,tmp;
1825     for (i=0; i<matin->rmap->n; i++) {
1826       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1827       if (max < tmp) max = tmp;
1828     }
1829     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1830   }
1831 
1832   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1833   lrow = row - rstart;
1834 
1835   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1836   if (!v)   {pvA = 0; pvB = 0;}
1837   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1838   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1839   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1840   nztot = nzA + nzB;
1841 
1842   cmap = mat->garray;
1843   if (v  || idx) {
1844     if (nztot) {
1845       /* Sort by increasing column numbers, assuming A and B already sorted */
1846       PetscInt imark = -1;
1847       if (v) {
1848         *v = v_p = mat->rowvalues;
1849         for (i=0; i<nzB; i++) {
1850           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1851           else break;
1852         }
1853         imark = i;
1854         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1855         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1856       }
1857       if (idx) {
1858         *idx = idx_p = mat->rowindices;
1859         if (imark > -1) {
1860           for (i=0; i<imark; i++) {
1861             idx_p[i] = cmap[cworkB[i]];
1862           }
1863         } else {
1864           for (i=0; i<nzB; i++) {
1865             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1866             else break;
1867           }
1868           imark = i;
1869         }
1870         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1871         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1872       }
1873     } else {
1874       if (idx) *idx = 0;
1875       if (v)   *v   = 0;
1876     }
1877   }
1878   *nz  = nztot;
1879   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1880   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1886 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1887 {
1888   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1889 
1890   PetscFunctionBegin;
1891   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1892   aij->getrowactive = PETSC_FALSE;
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatNorm_MPIAIJ"
1898 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1899 {
1900   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1901   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1902   PetscErrorCode ierr;
1903   PetscInt       i,j,cstart = mat->cmap->rstart;
1904   PetscReal      sum = 0.0;
1905   MatScalar      *v;
1906 
1907   PetscFunctionBegin;
1908   if (aij->size == 1) {
1909     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1910   } else {
1911     if (type == NORM_FROBENIUS) {
1912       v = amat->a;
1913       for (i=0; i<amat->nz; i++) {
1914         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1915       }
1916       v = bmat->a;
1917       for (i=0; i<bmat->nz; i++) {
1918         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1919       }
1920       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1921       *norm = PetscSqrtReal(*norm);
1922     } else if (type == NORM_1) { /* max column norm */
1923       PetscReal *tmp,*tmp2;
1924       PetscInt  *jj,*garray = aij->garray;
1925       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1926       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1927       *norm = 0.0;
1928       v     = amat->a; jj = amat->j;
1929       for (j=0; j<amat->nz; j++) {
1930         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1931       }
1932       v = bmat->a; jj = bmat->j;
1933       for (j=0; j<bmat->nz; j++) {
1934         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1935       }
1936       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1937       for (j=0; j<mat->cmap->N; j++) {
1938         if (tmp2[j] > *norm) *norm = tmp2[j];
1939       }
1940       ierr = PetscFree(tmp);CHKERRQ(ierr);
1941       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1942     } else if (type == NORM_INFINITY) { /* max row norm */
1943       PetscReal ntemp = 0.0;
1944       for (j=0; j<aij->A->rmap->n; j++) {
1945         v   = amat->a + amat->i[j];
1946         sum = 0.0;
1947         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1948           sum += PetscAbsScalar(*v); v++;
1949         }
1950         v = bmat->a + bmat->i[j];
1951         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1952           sum += PetscAbsScalar(*v); v++;
1953         }
1954         if (sum > ntemp) ntemp = sum;
1955       }
1956       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1957     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1958   }
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatTranspose_MPIAIJ"
1964 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1965 {
1966   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1967   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1968   PetscErrorCode ierr;
1969   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1970   PetscInt       cstart = A->cmap->rstart,ncol;
1971   Mat            B;
1972   MatScalar      *array;
1973 
1974   PetscFunctionBegin;
1975   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1976 
1977   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1978   ai = Aloc->i; aj = Aloc->j;
1979   bi = Bloc->i; bj = Bloc->j;
1980   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1981     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1982     PetscSFNode          *oloc;
1983     PETSC_UNUSED PetscSF sf;
1984 
1985     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1986     /* compute d_nnz for preallocation */
1987     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1988     for (i=0; i<ai[ma]; i++) {
1989       d_nnz[aj[i]]++;
1990       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1991     }
1992     /* compute local off-diagonal contributions */
1993     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1994     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1995     /* map those to global */
1996     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1997     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1998     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1999     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2000     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2001     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2002     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2003 
2004     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2005     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2006     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2007     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2008     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2009     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2010   } else {
2011     B    = *matout;
2012     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2013     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2014   }
2015 
2016   /* copy over the A part */
2017   array = Aloc->a;
2018   row   = A->rmap->rstart;
2019   for (i=0; i<ma; i++) {
2020     ncol = ai[i+1]-ai[i];
2021     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2022     row++;
2023     array += ncol; aj += ncol;
2024   }
2025   aj = Aloc->j;
2026   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2027 
2028   /* copy over the B part */
2029   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2030   array = Bloc->a;
2031   row   = A->rmap->rstart;
2032   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2033   cols_tmp = cols;
2034   for (i=0; i<mb; i++) {
2035     ncol = bi[i+1]-bi[i];
2036     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2037     row++;
2038     array += ncol; cols_tmp += ncol;
2039   }
2040   ierr = PetscFree(cols);CHKERRQ(ierr);
2041 
2042   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2043   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2044   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2045     *matout = B;
2046   } else {
2047     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2048   }
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2054 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2055 {
2056   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2057   Mat            a    = aij->A,b = aij->B;
2058   PetscErrorCode ierr;
2059   PetscInt       s1,s2,s3;
2060 
2061   PetscFunctionBegin;
2062   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2063   if (rr) {
2064     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2065     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2066     /* Overlap communication with computation. */
2067     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2068   }
2069   if (ll) {
2070     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2071     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2072     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2073   }
2074   /* scale  the diagonal block */
2075   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2076 
2077   if (rr) {
2078     /* Do a scatter end and then right scale the off-diagonal block */
2079     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2080     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2081   }
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2087 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2088 {
2089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2090   PetscErrorCode ierr;
2091 
2092   PetscFunctionBegin;
2093   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatEqual_MPIAIJ"
2099 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2100 {
2101   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2102   Mat            a,b,c,d;
2103   PetscBool      flg;
2104   PetscErrorCode ierr;
2105 
2106   PetscFunctionBegin;
2107   a = matA->A; b = matA->B;
2108   c = matB->A; d = matB->B;
2109 
2110   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2111   if (flg) {
2112     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2113   }
2114   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatCopy_MPIAIJ"
2120 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2121 {
2122   PetscErrorCode ierr;
2123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2124   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2125 
2126   PetscFunctionBegin;
2127   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2128   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2129     /* because of the column compression in the off-processor part of the matrix a->B,
2130        the number of columns in a->B and b->B may be different, hence we cannot call
2131        the MatCopy() directly on the two parts. If need be, we can provide a more
2132        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2133        then copying the submatrices */
2134     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2135   } else {
2136     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2137     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2138   }
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatSetUp_MPIAIJ"
2144 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2145 {
2146   PetscErrorCode ierr;
2147 
2148   PetscFunctionBegin;
2149   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 /*
2154    Computes the number of nonzeros per row needed for preallocation when X and Y
2155    have different nonzero structure.
2156 */
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2159 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2160 {
2161   PetscInt       i,j,k,nzx,nzy;
2162 
2163   PetscFunctionBegin;
2164   /* Set the number of nonzeros in the new matrix */
2165   for (i=0; i<m; i++) {
2166     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2167     nzx = xi[i+1] - xi[i];
2168     nzy = yi[i+1] - yi[i];
2169     nnz[i] = 0;
2170     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2171       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2172       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2173       nnz[i]++;
2174     }
2175     for (; k<nzy; k++) nnz[i]++;
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2181 #undef __FUNCT__
2182 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2183 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2184 {
2185   PetscErrorCode ierr;
2186   PetscInt       m = Y->rmap->N;
2187   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2188   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2189 
2190   PetscFunctionBegin;
2191   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatAXPY_MPIAIJ"
2197 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2198 {
2199   PetscErrorCode ierr;
2200   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2201   PetscBLASInt   bnz,one=1;
2202   Mat_SeqAIJ     *x,*y;
2203 
2204   PetscFunctionBegin;
2205   if (str == SAME_NONZERO_PATTERN) {
2206     PetscScalar alpha = a;
2207     x    = (Mat_SeqAIJ*)xx->A->data;
2208     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2209     y    = (Mat_SeqAIJ*)yy->A->data;
2210     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2211     x    = (Mat_SeqAIJ*)xx->B->data;
2212     y    = (Mat_SeqAIJ*)yy->B->data;
2213     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2214     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2215     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2216   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2217     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2218   } else {
2219     Mat      B;
2220     PetscInt *nnz_d,*nnz_o;
2221     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2222     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2223     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2224     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2225     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2226     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2227     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2228     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2229     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2230     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2231     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2232     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2233     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2234     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2235   }
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2240 
2241 #undef __FUNCT__
2242 #define __FUNCT__ "MatConjugate_MPIAIJ"
2243 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2244 {
2245 #if defined(PETSC_USE_COMPLEX)
2246   PetscErrorCode ierr;
2247   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2248 
2249   PetscFunctionBegin;
2250   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2251   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2252 #else
2253   PetscFunctionBegin;
2254 #endif
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatRealPart_MPIAIJ"
2260 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2261 {
2262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2263   PetscErrorCode ierr;
2264 
2265   PetscFunctionBegin;
2266   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2267   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2273 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2274 {
2275   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2276   PetscErrorCode ierr;
2277 
2278   PetscFunctionBegin;
2279   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2280   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2286 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2287 {
2288   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2289   PetscErrorCode ierr;
2290   PetscInt       i,*idxb = 0;
2291   PetscScalar    *va,*vb;
2292   Vec            vtmp;
2293 
2294   PetscFunctionBegin;
2295   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2296   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2297   if (idx) {
2298     for (i=0; i<A->rmap->n; i++) {
2299       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2300     }
2301   }
2302 
2303   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2304   if (idx) {
2305     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2306   }
2307   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2308   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2309 
2310   for (i=0; i<A->rmap->n; i++) {
2311     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2312       va[i] = vb[i];
2313       if (idx) idx[i] = a->garray[idxb[i]];
2314     }
2315   }
2316 
2317   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2318   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2319   ierr = PetscFree(idxb);CHKERRQ(ierr);
2320   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 #undef __FUNCT__
2325 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2326 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2327 {
2328   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2329   PetscErrorCode ierr;
2330   PetscInt       i,*idxb = 0;
2331   PetscScalar    *va,*vb;
2332   Vec            vtmp;
2333 
2334   PetscFunctionBegin;
2335   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2336   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2337   if (idx) {
2338     for (i=0; i<A->cmap->n; i++) {
2339       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2340     }
2341   }
2342 
2343   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2344   if (idx) {
2345     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2346   }
2347   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2348   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2349 
2350   for (i=0; i<A->rmap->n; i++) {
2351     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2352       va[i] = vb[i];
2353       if (idx) idx[i] = a->garray[idxb[i]];
2354     }
2355   }
2356 
2357   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2358   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2359   ierr = PetscFree(idxb);CHKERRQ(ierr);
2360   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2366 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2367 {
2368   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2369   PetscInt       n      = A->rmap->n;
2370   PetscInt       cstart = A->cmap->rstart;
2371   PetscInt       *cmap  = mat->garray;
2372   PetscInt       *diagIdx, *offdiagIdx;
2373   Vec            diagV, offdiagV;
2374   PetscScalar    *a, *diagA, *offdiagA;
2375   PetscInt       r;
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2380   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2381   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2382   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2383   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2384   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2385   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2386   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2387   for (r = 0; r < n; ++r) {
2388     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2389       a[r]   = diagA[r];
2390       idx[r] = cstart + diagIdx[r];
2391     } else {
2392       a[r]   = offdiagA[r];
2393       idx[r] = cmap[offdiagIdx[r]];
2394     }
2395   }
2396   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2397   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2398   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2399   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2400   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2401   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2407 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2408 {
2409   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2410   PetscInt       n      = A->rmap->n;
2411   PetscInt       cstart = A->cmap->rstart;
2412   PetscInt       *cmap  = mat->garray;
2413   PetscInt       *diagIdx, *offdiagIdx;
2414   Vec            diagV, offdiagV;
2415   PetscScalar    *a, *diagA, *offdiagA;
2416   PetscInt       r;
2417   PetscErrorCode ierr;
2418 
2419   PetscFunctionBegin;
2420   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2421   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2422   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2423   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2424   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2425   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2426   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2427   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2428   for (r = 0; r < n; ++r) {
2429     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2430       a[r]   = diagA[r];
2431       idx[r] = cstart + diagIdx[r];
2432     } else {
2433       a[r]   = offdiagA[r];
2434       idx[r] = cmap[offdiagIdx[r]];
2435     }
2436   }
2437   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2438   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2439   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2440   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2441   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2442   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 #undef __FUNCT__
2447 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2448 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2449 {
2450   PetscErrorCode ierr;
2451   Mat            *dummy;
2452 
2453   PetscFunctionBegin;
2454   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2455   *newmat = *dummy;
2456   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 #undef __FUNCT__
2461 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2462 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2463 {
2464   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2465   PetscErrorCode ierr;
2466 
2467   PetscFunctionBegin;
2468   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 #undef __FUNCT__
2473 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2474 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2475 {
2476   PetscErrorCode ierr;
2477   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2478 
2479   PetscFunctionBegin;
2480   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2481   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2482   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2483   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 #undef __FUNCT__
2488 #define __FUNCT__ "MatShift_MPIAIJ"
2489 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2490 {
2491   PetscErrorCode ierr;
2492   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2493   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2494 
2495   PetscFunctionBegin;
2496   if (!Y->preallocated) {
2497     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2498   } else if (!aij->nz) {
2499     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2500   }
2501   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 /* -------------------------------------------------------------------*/
2506 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2507                                        MatGetRow_MPIAIJ,
2508                                        MatRestoreRow_MPIAIJ,
2509                                        MatMult_MPIAIJ,
2510                                 /* 4*/ MatMultAdd_MPIAIJ,
2511                                        MatMultTranspose_MPIAIJ,
2512                                        MatMultTransposeAdd_MPIAIJ,
2513                                        0,
2514                                        0,
2515                                        0,
2516                                 /*10*/ 0,
2517                                        0,
2518                                        0,
2519                                        MatSOR_MPIAIJ,
2520                                        MatTranspose_MPIAIJ,
2521                                 /*15*/ MatGetInfo_MPIAIJ,
2522                                        MatEqual_MPIAIJ,
2523                                        MatGetDiagonal_MPIAIJ,
2524                                        MatDiagonalScale_MPIAIJ,
2525                                        MatNorm_MPIAIJ,
2526                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2527                                        MatAssemblyEnd_MPIAIJ,
2528                                        MatSetOption_MPIAIJ,
2529                                        MatZeroEntries_MPIAIJ,
2530                                 /*24*/ MatZeroRows_MPIAIJ,
2531                                        0,
2532                                        0,
2533                                        0,
2534                                        0,
2535                                 /*29*/ MatSetUp_MPIAIJ,
2536                                        0,
2537                                        0,
2538                                        0,
2539                                        0,
2540                                 /*34*/ MatDuplicate_MPIAIJ,
2541                                        0,
2542                                        0,
2543                                        0,
2544                                        0,
2545                                 /*39*/ MatAXPY_MPIAIJ,
2546                                        MatGetSubMatrices_MPIAIJ,
2547                                        MatIncreaseOverlap_MPIAIJ,
2548                                        MatGetValues_MPIAIJ,
2549                                        MatCopy_MPIAIJ,
2550                                 /*44*/ MatGetRowMax_MPIAIJ,
2551                                        MatScale_MPIAIJ,
2552                                        MatShift_MPIAIJ,
2553                                        MatDiagonalSet_MPIAIJ,
2554                                        MatZeroRowsColumns_MPIAIJ,
2555                                 /*49*/ MatSetRandom_MPIAIJ,
2556                                        0,
2557                                        0,
2558                                        0,
2559                                        0,
2560                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2561                                        0,
2562                                        MatSetUnfactored_MPIAIJ,
2563                                        MatPermute_MPIAIJ,
2564                                        0,
2565                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2566                                        MatDestroy_MPIAIJ,
2567                                        MatView_MPIAIJ,
2568                                        0,
2569                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2570                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2571                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2572                                        0,
2573                                        0,
2574                                        0,
2575                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2576                                        MatGetRowMinAbs_MPIAIJ,
2577                                        0,
2578                                        MatSetColoring_MPIAIJ,
2579                                        0,
2580                                        MatSetValuesAdifor_MPIAIJ,
2581                                 /*75*/ MatFDColoringApply_AIJ,
2582                                        0,
2583                                        0,
2584                                        0,
2585                                        MatFindZeroDiagonals_MPIAIJ,
2586                                 /*80*/ 0,
2587                                        0,
2588                                        0,
2589                                 /*83*/ MatLoad_MPIAIJ,
2590                                        0,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2596                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2597                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2598                                        MatPtAP_MPIAIJ_MPIAIJ,
2599                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2600                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                 /*99*/ 0,
2606                                        0,
2607                                        0,
2608                                        MatConjugate_MPIAIJ,
2609                                        0,
2610                                 /*104*/MatSetValuesRow_MPIAIJ,
2611                                        MatRealPart_MPIAIJ,
2612                                        MatImaginaryPart_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                 /*109*/0,
2616                                        0,
2617                                        MatGetRowMin_MPIAIJ,
2618                                        0,
2619                                        0,
2620                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2621                                        0,
2622                                        MatGetGhosts_MPIAIJ,
2623                                        0,
2624                                        0,
2625                                 /*119*/0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                        MatGetMultiProcBlock_MPIAIJ,
2630                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2631                                        MatGetColumnNorms_MPIAIJ,
2632                                        MatInvertBlockDiagonal_MPIAIJ,
2633                                        0,
2634                                        MatGetSubMatricesMPI_MPIAIJ,
2635                                 /*129*/0,
2636                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2637                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2638                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2639                                        0,
2640                                 /*134*/0,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                 /*139*/0,
2646                                        0,
2647                                        0,
2648                                        MatFDColoringSetUp_MPIXAIJ,
2649                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2650                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2651 };
2652 
2653 /* ----------------------------------------------------------------------------------------*/
2654 
2655 #undef __FUNCT__
2656 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2657 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2658 {
2659   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2660   PetscErrorCode ierr;
2661 
2662   PetscFunctionBegin;
2663   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2664   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 #undef __FUNCT__
2669 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2670 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2671 {
2672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2673   PetscErrorCode ierr;
2674 
2675   PetscFunctionBegin;
2676   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2677   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2683 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2684 {
2685   Mat_MPIAIJ     *b;
2686   PetscErrorCode ierr;
2687 
2688   PetscFunctionBegin;
2689   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2690   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2691   b = (Mat_MPIAIJ*)B->data;
2692 
2693   if (!B->preallocated) {
2694     /* Explicitly create 2 MATSEQAIJ matrices. */
2695     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2696     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2697     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2698     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2699     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2700     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2701     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2702     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2703     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2704     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2705   }
2706 
2707   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2708   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2709   B->preallocated = PETSC_TRUE;
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 #undef __FUNCT__
2714 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2715 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2716 {
2717   Mat            mat;
2718   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2719   PetscErrorCode ierr;
2720 
2721   PetscFunctionBegin;
2722   *newmat = 0;
2723   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2724   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2725   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2726   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2727   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2728   a       = (Mat_MPIAIJ*)mat->data;
2729 
2730   mat->factortype   = matin->factortype;
2731   mat->assembled    = PETSC_TRUE;
2732   mat->insertmode   = NOT_SET_VALUES;
2733   mat->preallocated = PETSC_TRUE;
2734 
2735   a->size         = oldmat->size;
2736   a->rank         = oldmat->rank;
2737   a->donotstash   = oldmat->donotstash;
2738   a->roworiented  = oldmat->roworiented;
2739   a->rowindices   = 0;
2740   a->rowvalues    = 0;
2741   a->getrowactive = PETSC_FALSE;
2742 
2743   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2744   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2745 
2746   if (oldmat->colmap) {
2747 #if defined(PETSC_USE_CTABLE)
2748     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2749 #else
2750     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2751     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2752     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2753 #endif
2754   } else a->colmap = 0;
2755   if (oldmat->garray) {
2756     PetscInt len;
2757     len  = oldmat->B->cmap->n;
2758     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2759     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2760     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2761   } else a->garray = 0;
2762 
2763   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2764   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2765   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2766   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2767   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2768   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2769   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2770   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2771   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2772   *newmat = mat;
2773   PetscFunctionReturn(0);
2774 }
2775 
2776 
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatLoad_MPIAIJ"
2780 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2781 {
2782   PetscScalar    *vals,*svals;
2783   MPI_Comm       comm;
2784   PetscErrorCode ierr;
2785   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2786   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2787   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2788   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2789   PetscInt       cend,cstart,n,*rowners;
2790   int            fd;
2791   PetscInt       bs = newMat->rmap->bs;
2792 
2793   PetscFunctionBegin;
2794   /* force binary viewer to load .info file if it has not yet done so */
2795   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2796   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2798   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2799   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2800   if (!rank) {
2801     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2802     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2803   }
2804 
2805   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2806   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2807   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2808   if (bs < 0) bs = 1;
2809 
2810   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2811   M    = header[1]; N = header[2];
2812 
2813   /* If global sizes are set, check if they are consistent with that given in the file */
2814   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2815   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2816 
2817   /* determine ownership of all (block) rows */
2818   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2819   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2820   else m = newMat->rmap->n; /* Set by user */
2821 
2822   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2823   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2824 
2825   /* First process needs enough room for process with most rows */
2826   if (!rank) {
2827     mmax = rowners[1];
2828     for (i=2; i<=size; i++) {
2829       mmax = PetscMax(mmax, rowners[i]);
2830     }
2831   } else mmax = -1;             /* unused, but compilers complain */
2832 
2833   rowners[0] = 0;
2834   for (i=2; i<=size; i++) {
2835     rowners[i] += rowners[i-1];
2836   }
2837   rstart = rowners[rank];
2838   rend   = rowners[rank+1];
2839 
2840   /* distribute row lengths to all processors */
2841   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2842   if (!rank) {
2843     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2844     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2845     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2846     for (j=0; j<m; j++) {
2847       procsnz[0] += ourlens[j];
2848     }
2849     for (i=1; i<size; i++) {
2850       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2851       /* calculate the number of nonzeros on each processor */
2852       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2853         procsnz[i] += rowlengths[j];
2854       }
2855       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2856     }
2857     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2858   } else {
2859     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2860   }
2861 
2862   if (!rank) {
2863     /* determine max buffer needed and allocate it */
2864     maxnz = 0;
2865     for (i=0; i<size; i++) {
2866       maxnz = PetscMax(maxnz,procsnz[i]);
2867     }
2868     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2869 
2870     /* read in my part of the matrix column indices  */
2871     nz   = procsnz[0];
2872     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2873     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2874 
2875     /* read in every one elses and ship off */
2876     for (i=1; i<size; i++) {
2877       nz   = procsnz[i];
2878       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2879       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2880     }
2881     ierr = PetscFree(cols);CHKERRQ(ierr);
2882   } else {
2883     /* determine buffer space needed for message */
2884     nz = 0;
2885     for (i=0; i<m; i++) {
2886       nz += ourlens[i];
2887     }
2888     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2889 
2890     /* receive message of column indices*/
2891     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2892   }
2893 
2894   /* determine column ownership if matrix is not square */
2895   if (N != M) {
2896     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2897     else n = newMat->cmap->n;
2898     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2899     cstart = cend - n;
2900   } else {
2901     cstart = rstart;
2902     cend   = rend;
2903     n      = cend - cstart;
2904   }
2905 
2906   /* loop over local rows, determining number of off diagonal entries */
2907   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2908   jj   = 0;
2909   for (i=0; i<m; i++) {
2910     for (j=0; j<ourlens[i]; j++) {
2911       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2912       jj++;
2913     }
2914   }
2915 
2916   for (i=0; i<m; i++) {
2917     ourlens[i] -= offlens[i];
2918   }
2919   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2920 
2921   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2922 
2923   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2924 
2925   for (i=0; i<m; i++) {
2926     ourlens[i] += offlens[i];
2927   }
2928 
2929   if (!rank) {
2930     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2931 
2932     /* read in my part of the matrix numerical values  */
2933     nz   = procsnz[0];
2934     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2935 
2936     /* insert into matrix */
2937     jj      = rstart;
2938     smycols = mycols;
2939     svals   = vals;
2940     for (i=0; i<m; i++) {
2941       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2942       smycols += ourlens[i];
2943       svals   += ourlens[i];
2944       jj++;
2945     }
2946 
2947     /* read in other processors and ship out */
2948     for (i=1; i<size; i++) {
2949       nz   = procsnz[i];
2950       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2951       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2952     }
2953     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2954   } else {
2955     /* receive numeric values */
2956     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2957 
2958     /* receive message of values*/
2959     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2960 
2961     /* insert into matrix */
2962     jj      = rstart;
2963     smycols = mycols;
2964     svals   = vals;
2965     for (i=0; i<m; i++) {
2966       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2967       smycols += ourlens[i];
2968       svals   += ourlens[i];
2969       jj++;
2970     }
2971   }
2972   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2973   ierr = PetscFree(vals);CHKERRQ(ierr);
2974   ierr = PetscFree(mycols);CHKERRQ(ierr);
2975   ierr = PetscFree(rowners);CHKERRQ(ierr);
2976   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2977   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 #undef __FUNCT__
2982 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2983 /* TODO: Not scalable because of ISAllGather(). */
2984 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2985 {
2986   PetscErrorCode ierr;
2987   IS             iscol_local;
2988   PetscInt       csize;
2989 
2990   PetscFunctionBegin;
2991   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2992   if (call == MAT_REUSE_MATRIX) {
2993     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2994     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2995   } else {
2996     PetscInt cbs;
2997     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
2998     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2999     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3000   }
3001   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3002   if (call == MAT_INITIAL_MATRIX) {
3003     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3004     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3005   }
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3010 #undef __FUNCT__
3011 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3012 /*
3013     Not great since it makes two copies of the submatrix, first an SeqAIJ
3014   in local and then by concatenating the local matrices the end result.
3015   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3016 
3017   Note: This requires a sequential iscol with all indices.
3018 */
3019 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3020 {
3021   PetscErrorCode ierr;
3022   PetscMPIInt    rank,size;
3023   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3024   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3025   PetscBool      allcolumns, colflag;
3026   Mat            M,Mreuse;
3027   MatScalar      *vwork,*aa;
3028   MPI_Comm       comm;
3029   Mat_SeqAIJ     *aij;
3030 
3031   PetscFunctionBegin;
3032   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3033   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3034   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3035 
3036   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3037   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3038   if (colflag && ncol == mat->cmap->N) {
3039     allcolumns = PETSC_TRUE;
3040   } else {
3041     allcolumns = PETSC_FALSE;
3042   }
3043   if (call ==  MAT_REUSE_MATRIX) {
3044     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3045     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3046     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3047   } else {
3048     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3049   }
3050 
3051   /*
3052       m - number of local rows
3053       n - number of columns (same on all processors)
3054       rstart - first row in new global matrix generated
3055   */
3056   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3057   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3058   if (call == MAT_INITIAL_MATRIX) {
3059     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3060     ii  = aij->i;
3061     jj  = aij->j;
3062 
3063     /*
3064         Determine the number of non-zeros in the diagonal and off-diagonal
3065         portions of the matrix in order to do correct preallocation
3066     */
3067 
3068     /* first get start and end of "diagonal" columns */
3069     if (csize == PETSC_DECIDE) {
3070       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3071       if (mglobal == n) { /* square matrix */
3072         nlocal = m;
3073       } else {
3074         nlocal = n/size + ((n % size) > rank);
3075       }
3076     } else {
3077       nlocal = csize;
3078     }
3079     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3080     rstart = rend - nlocal;
3081     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3082 
3083     /* next, compute all the lengths */
3084     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3085     olens = dlens + m;
3086     for (i=0; i<m; i++) {
3087       jend = ii[i+1] - ii[i];
3088       olen = 0;
3089       dlen = 0;
3090       for (j=0; j<jend; j++) {
3091         if (*jj < rstart || *jj >= rend) olen++;
3092         else dlen++;
3093         jj++;
3094       }
3095       olens[i] = olen;
3096       dlens[i] = dlen;
3097     }
3098     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3099     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3100     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3101     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3102     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3103     ierr = PetscFree(dlens);CHKERRQ(ierr);
3104   } else {
3105     PetscInt ml,nl;
3106 
3107     M    = *newmat;
3108     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3109     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3110     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3111     /*
3112          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3113        rather than the slower MatSetValues().
3114     */
3115     M->was_assembled = PETSC_TRUE;
3116     M->assembled     = PETSC_FALSE;
3117   }
3118   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3119   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3120   ii   = aij->i;
3121   jj   = aij->j;
3122   aa   = aij->a;
3123   for (i=0; i<m; i++) {
3124     row   = rstart + i;
3125     nz    = ii[i+1] - ii[i];
3126     cwork = jj;     jj += nz;
3127     vwork = aa;     aa += nz;
3128     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3129   }
3130 
3131   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3132   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3133   *newmat = M;
3134 
3135   /* save submatrix used in processor for next request */
3136   if (call ==  MAT_INITIAL_MATRIX) {
3137     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3138     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3139   }
3140   PetscFunctionReturn(0);
3141 }
3142 
3143 #undef __FUNCT__
3144 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3145 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3146 {
3147   PetscInt       m,cstart, cend,j,nnz,i,d;
3148   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3149   const PetscInt *JJ;
3150   PetscScalar    *values;
3151   PetscErrorCode ierr;
3152 
3153   PetscFunctionBegin;
3154   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3155 
3156   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3157   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3158   m      = B->rmap->n;
3159   cstart = B->cmap->rstart;
3160   cend   = B->cmap->rend;
3161   rstart = B->rmap->rstart;
3162 
3163   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3164 
3165 #if defined(PETSC_USE_DEBUGGING)
3166   for (i=0; i<m; i++) {
3167     nnz = Ii[i+1]- Ii[i];
3168     JJ  = J + Ii[i];
3169     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3170     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3171     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3172   }
3173 #endif
3174 
3175   for (i=0; i<m; i++) {
3176     nnz     = Ii[i+1]- Ii[i];
3177     JJ      = J + Ii[i];
3178     nnz_max = PetscMax(nnz_max,nnz);
3179     d       = 0;
3180     for (j=0; j<nnz; j++) {
3181       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3182     }
3183     d_nnz[i] = d;
3184     o_nnz[i] = nnz - d;
3185   }
3186   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3187   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3188 
3189   if (v) values = (PetscScalar*)v;
3190   else {
3191     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3192   }
3193 
3194   for (i=0; i<m; i++) {
3195     ii   = i + rstart;
3196     nnz  = Ii[i+1]- Ii[i];
3197     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3198   }
3199   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3200   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3201 
3202   if (!v) {
3203     ierr = PetscFree(values);CHKERRQ(ierr);
3204   }
3205   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3206   PetscFunctionReturn(0);
3207 }
3208 
3209 #undef __FUNCT__
3210 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3211 /*@
3212    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3213    (the default parallel PETSc format).
3214 
3215    Collective on MPI_Comm
3216 
3217    Input Parameters:
3218 +  B - the matrix
3219 .  i - the indices into j for the start of each local row (starts with zero)
3220 .  j - the column indices for each local row (starts with zero)
3221 -  v - optional values in the matrix
3222 
3223    Level: developer
3224 
3225    Notes:
3226        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3227      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3228      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3229 
3230        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3231 
3232        The format which is used for the sparse matrix input, is equivalent to a
3233     row-major ordering.. i.e for the following matrix, the input data expected is
3234     as shown
3235 
3236 $        1 0 0
3237 $        2 0 3     P0
3238 $       -------
3239 $        4 5 6     P1
3240 $
3241 $     Process0 [P0]: rows_owned=[0,1]
3242 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3243 $        j =  {0,0,2}  [size = 3]
3244 $        v =  {1,2,3}  [size = 3]
3245 $
3246 $     Process1 [P1]: rows_owned=[2]
3247 $        i =  {0,3}    [size = nrow+1  = 1+1]
3248 $        j =  {0,1,2}  [size = 3]
3249 $        v =  {4,5,6}  [size = 3]
3250 
3251 .keywords: matrix, aij, compressed row, sparse, parallel
3252 
3253 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3254           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3255 @*/
3256 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3257 {
3258   PetscErrorCode ierr;
3259 
3260   PetscFunctionBegin;
3261   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 #undef __FUNCT__
3266 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3267 /*@C
3268    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3269    (the default parallel PETSc format).  For good matrix assembly performance
3270    the user should preallocate the matrix storage by setting the parameters
3271    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3272    performance can be increased by more than a factor of 50.
3273 
3274    Collective on MPI_Comm
3275 
3276    Input Parameters:
3277 +  B - the matrix
3278 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3279            (same value is used for all local rows)
3280 .  d_nnz - array containing the number of nonzeros in the various rows of the
3281            DIAGONAL portion of the local submatrix (possibly different for each row)
3282            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3283            The size of this array is equal to the number of local rows, i.e 'm'.
3284            For matrices that will be factored, you must leave room for (and set)
3285            the diagonal entry even if it is zero.
3286 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3287            submatrix (same value is used for all local rows).
3288 -  o_nnz - array containing the number of nonzeros in the various rows of the
3289            OFF-DIAGONAL portion of the local submatrix (possibly different for
3290            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3291            structure. The size of this array is equal to the number
3292            of local rows, i.e 'm'.
3293 
3294    If the *_nnz parameter is given then the *_nz parameter is ignored
3295 
3296    The AIJ format (also called the Yale sparse matrix format or
3297    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3298    storage.  The stored row and column indices begin with zero.
3299    See Users-Manual: ch_mat for details.
3300 
3301    The parallel matrix is partitioned such that the first m0 rows belong to
3302    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3303    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3304 
3305    The DIAGONAL portion of the local submatrix of a processor can be defined
3306    as the submatrix which is obtained by extraction the part corresponding to
3307    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3308    first row that belongs to the processor, r2 is the last row belonging to
3309    the this processor, and c1-c2 is range of indices of the local part of a
3310    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3311    common case of a square matrix, the row and column ranges are the same and
3312    the DIAGONAL part is also square. The remaining portion of the local
3313    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3314 
3315    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3316 
3317    You can call MatGetInfo() to get information on how effective the preallocation was;
3318    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3319    You can also run with the option -info and look for messages with the string
3320    malloc in them to see if additional memory allocation was needed.
3321 
3322    Example usage:
3323 
3324    Consider the following 8x8 matrix with 34 non-zero values, that is
3325    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3326    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3327    as follows:
3328 
3329 .vb
3330             1  2  0  |  0  3  0  |  0  4
3331     Proc0   0  5  6  |  7  0  0  |  8  0
3332             9  0 10  | 11  0  0  | 12  0
3333     -------------------------------------
3334            13  0 14  | 15 16 17  |  0  0
3335     Proc1   0 18  0  | 19 20 21  |  0  0
3336             0  0  0  | 22 23  0  | 24  0
3337     -------------------------------------
3338     Proc2  25 26 27  |  0  0 28  | 29  0
3339            30  0  0  | 31 32 33  |  0 34
3340 .ve
3341 
3342    This can be represented as a collection of submatrices as:
3343 
3344 .vb
3345       A B C
3346       D E F
3347       G H I
3348 .ve
3349 
3350    Where the submatrices A,B,C are owned by proc0, D,E,F are
3351    owned by proc1, G,H,I are owned by proc2.
3352 
3353    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3354    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3355    The 'M','N' parameters are 8,8, and have the same values on all procs.
3356 
3357    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3358    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3359    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3360    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3361    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3362    matrix, ans [DF] as another SeqAIJ matrix.
3363 
3364    When d_nz, o_nz parameters are specified, d_nz storage elements are
3365    allocated for every row of the local diagonal submatrix, and o_nz
3366    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3367    One way to choose d_nz and o_nz is to use the max nonzerors per local
3368    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3369    In this case, the values of d_nz,o_nz are:
3370 .vb
3371      proc0 : dnz = 2, o_nz = 2
3372      proc1 : dnz = 3, o_nz = 2
3373      proc2 : dnz = 1, o_nz = 4
3374 .ve
3375    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3376    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3377    for proc3. i.e we are using 12+15+10=37 storage locations to store
3378    34 values.
3379 
3380    When d_nnz, o_nnz parameters are specified, the storage is specified
3381    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3382    In the above case the values for d_nnz,o_nnz are:
3383 .vb
3384      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3385      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3386      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3387 .ve
3388    Here the space allocated is sum of all the above values i.e 34, and
3389    hence pre-allocation is perfect.
3390 
3391    Level: intermediate
3392 
3393 .keywords: matrix, aij, compressed row, sparse, parallel
3394 
3395 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3396           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3397 @*/
3398 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3399 {
3400   PetscErrorCode ierr;
3401 
3402   PetscFunctionBegin;
3403   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3404   PetscValidType(B,1);
3405   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3406   PetscFunctionReturn(0);
3407 }
3408 
3409 #undef __FUNCT__
3410 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3411 /*@
3412      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3413          CSR format the local rows.
3414 
3415    Collective on MPI_Comm
3416 
3417    Input Parameters:
3418 +  comm - MPI communicator
3419 .  m - number of local rows (Cannot be PETSC_DECIDE)
3420 .  n - This value should be the same as the local size used in creating the
3421        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3422        calculated if N is given) For square matrices n is almost always m.
3423 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3424 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3425 .   i - row indices
3426 .   j - column indices
3427 -   a - matrix values
3428 
3429    Output Parameter:
3430 .   mat - the matrix
3431 
3432    Level: intermediate
3433 
3434    Notes:
3435        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3436      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3437      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3438 
3439        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3440 
3441        The format which is used for the sparse matrix input, is equivalent to a
3442     row-major ordering.. i.e for the following matrix, the input data expected is
3443     as shown
3444 
3445 $        1 0 0
3446 $        2 0 3     P0
3447 $       -------
3448 $        4 5 6     P1
3449 $
3450 $     Process0 [P0]: rows_owned=[0,1]
3451 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3452 $        j =  {0,0,2}  [size = 3]
3453 $        v =  {1,2,3}  [size = 3]
3454 $
3455 $     Process1 [P1]: rows_owned=[2]
3456 $        i =  {0,3}    [size = nrow+1  = 1+1]
3457 $        j =  {0,1,2}  [size = 3]
3458 $        v =  {4,5,6}  [size = 3]
3459 
3460 .keywords: matrix, aij, compressed row, sparse, parallel
3461 
3462 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3463           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3464 @*/
3465 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3466 {
3467   PetscErrorCode ierr;
3468 
3469   PetscFunctionBegin;
3470   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3471   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3472   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3473   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3474   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3475   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3476   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3477   PetscFunctionReturn(0);
3478 }
3479 
3480 #undef __FUNCT__
3481 #define __FUNCT__ "MatCreateAIJ"
3482 /*@C
3483    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3484    (the default parallel PETSc format).  For good matrix assembly performance
3485    the user should preallocate the matrix storage by setting the parameters
3486    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3487    performance can be increased by more than a factor of 50.
3488 
3489    Collective on MPI_Comm
3490 
3491    Input Parameters:
3492 +  comm - MPI communicator
3493 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3494            This value should be the same as the local size used in creating the
3495            y vector for the matrix-vector product y = Ax.
3496 .  n - This value should be the same as the local size used in creating the
3497        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3498        calculated if N is given) For square matrices n is almost always m.
3499 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3500 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3501 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3502            (same value is used for all local rows)
3503 .  d_nnz - array containing the number of nonzeros in the various rows of the
3504            DIAGONAL portion of the local submatrix (possibly different for each row)
3505            or NULL, if d_nz is used to specify the nonzero structure.
3506            The size of this array is equal to the number of local rows, i.e 'm'.
3507 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3508            submatrix (same value is used for all local rows).
3509 -  o_nnz - array containing the number of nonzeros in the various rows of the
3510            OFF-DIAGONAL portion of the local submatrix (possibly different for
3511            each row) or NULL, if o_nz is used to specify the nonzero
3512            structure. The size of this array is equal to the number
3513            of local rows, i.e 'm'.
3514 
3515    Output Parameter:
3516 .  A - the matrix
3517 
3518    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3519    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3520    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3521 
3522    Notes:
3523    If the *_nnz parameter is given then the *_nz parameter is ignored
3524 
3525    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3526    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3527    storage requirements for this matrix.
3528 
3529    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3530    processor than it must be used on all processors that share the object for
3531    that argument.
3532 
3533    The user MUST specify either the local or global matrix dimensions
3534    (possibly both).
3535 
3536    The parallel matrix is partitioned across processors such that the
3537    first m0 rows belong to process 0, the next m1 rows belong to
3538    process 1, the next m2 rows belong to process 2 etc.. where
3539    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3540    values corresponding to [m x N] submatrix.
3541 
3542    The columns are logically partitioned with the n0 columns belonging
3543    to 0th partition, the next n1 columns belonging to the next
3544    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3545 
3546    The DIAGONAL portion of the local submatrix on any given processor
3547    is the submatrix corresponding to the rows and columns m,n
3548    corresponding to the given processor. i.e diagonal matrix on
3549    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3550    etc. The remaining portion of the local submatrix [m x (N-n)]
3551    constitute the OFF-DIAGONAL portion. The example below better
3552    illustrates this concept.
3553 
3554    For a square global matrix we define each processor's diagonal portion
3555    to be its local rows and the corresponding columns (a square submatrix);
3556    each processor's off-diagonal portion encompasses the remainder of the
3557    local matrix (a rectangular submatrix).
3558 
3559    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3560 
3561    When calling this routine with a single process communicator, a matrix of
3562    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3563    type of communicator, use the construction mechanism:
3564      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3565 
3566    By default, this format uses inodes (identical nodes) when possible.
3567    We search for consecutive rows with the same nonzero structure, thereby
3568    reusing matrix information to achieve increased efficiency.
3569 
3570    Options Database Keys:
3571 +  -mat_no_inode  - Do not use inodes
3572 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3573 -  -mat_aij_oneindex - Internally use indexing starting at 1
3574         rather than 0.  Note that when calling MatSetValues(),
3575         the user still MUST index entries starting at 0!
3576 
3577 
3578    Example usage:
3579 
3580    Consider the following 8x8 matrix with 34 non-zero values, that is
3581    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3582    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3583    as follows:
3584 
3585 .vb
3586             1  2  0  |  0  3  0  |  0  4
3587     Proc0   0  5  6  |  7  0  0  |  8  0
3588             9  0 10  | 11  0  0  | 12  0
3589     -------------------------------------
3590            13  0 14  | 15 16 17  |  0  0
3591     Proc1   0 18  0  | 19 20 21  |  0  0
3592             0  0  0  | 22 23  0  | 24  0
3593     -------------------------------------
3594     Proc2  25 26 27  |  0  0 28  | 29  0
3595            30  0  0  | 31 32 33  |  0 34
3596 .ve
3597 
3598    This can be represented as a collection of submatrices as:
3599 
3600 .vb
3601       A B C
3602       D E F
3603       G H I
3604 .ve
3605 
3606    Where the submatrices A,B,C are owned by proc0, D,E,F are
3607    owned by proc1, G,H,I are owned by proc2.
3608 
3609    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3610    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3611    The 'M','N' parameters are 8,8, and have the same values on all procs.
3612 
3613    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3614    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3615    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3616    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3617    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3618    matrix, ans [DF] as another SeqAIJ matrix.
3619 
3620    When d_nz, o_nz parameters are specified, d_nz storage elements are
3621    allocated for every row of the local diagonal submatrix, and o_nz
3622    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3623    One way to choose d_nz and o_nz is to use the max nonzerors per local
3624    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3625    In this case, the values of d_nz,o_nz are:
3626 .vb
3627      proc0 : dnz = 2, o_nz = 2
3628      proc1 : dnz = 3, o_nz = 2
3629      proc2 : dnz = 1, o_nz = 4
3630 .ve
3631    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3632    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3633    for proc3. i.e we are using 12+15+10=37 storage locations to store
3634    34 values.
3635 
3636    When d_nnz, o_nnz parameters are specified, the storage is specified
3637    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3638    In the above case the values for d_nnz,o_nnz are:
3639 .vb
3640      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3641      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3642      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3643 .ve
3644    Here the space allocated is sum of all the above values i.e 34, and
3645    hence pre-allocation is perfect.
3646 
3647    Level: intermediate
3648 
3649 .keywords: matrix, aij, compressed row, sparse, parallel
3650 
3651 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3652           MPIAIJ, MatCreateMPIAIJWithArrays()
3653 @*/
3654 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3655 {
3656   PetscErrorCode ierr;
3657   PetscMPIInt    size;
3658 
3659   PetscFunctionBegin;
3660   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3661   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3662   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3663   if (size > 1) {
3664     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3665     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3666   } else {
3667     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3668     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3669   }
3670   PetscFunctionReturn(0);
3671 }
3672 
3673 #undef __FUNCT__
3674 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3675 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3676 {
3677   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3678 
3679   PetscFunctionBegin;
3680   if (Ad)     *Ad     = a->A;
3681   if (Ao)     *Ao     = a->B;
3682   if (colmap) *colmap = a->garray;
3683   PetscFunctionReturn(0);
3684 }
3685 
3686 #undef __FUNCT__
3687 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3688 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3689 {
3690   PetscErrorCode ierr;
3691   PetscInt       i;
3692   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3693 
3694   PetscFunctionBegin;
3695   if (coloring->ctype == IS_COLORING_GLOBAL) {
3696     ISColoringValue *allcolors,*colors;
3697     ISColoring      ocoloring;
3698 
3699     /* set coloring for diagonal portion */
3700     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3701 
3702     /* set coloring for off-diagonal portion */
3703     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3704     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3705     for (i=0; i<a->B->cmap->n; i++) {
3706       colors[i] = allcolors[a->garray[i]];
3707     }
3708     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3709     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3710     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3711     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3712   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3713     ISColoringValue *colors;
3714     PetscInt        *larray;
3715     ISColoring      ocoloring;
3716 
3717     /* set coloring for diagonal portion */
3718     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3719     for (i=0; i<a->A->cmap->n; i++) {
3720       larray[i] = i + A->cmap->rstart;
3721     }
3722     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3723     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3724     for (i=0; i<a->A->cmap->n; i++) {
3725       colors[i] = coloring->colors[larray[i]];
3726     }
3727     ierr = PetscFree(larray);CHKERRQ(ierr);
3728     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3729     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3730     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3731 
3732     /* set coloring for off-diagonal portion */
3733     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3734     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3735     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3736     for (i=0; i<a->B->cmap->n; i++) {
3737       colors[i] = coloring->colors[larray[i]];
3738     }
3739     ierr = PetscFree(larray);CHKERRQ(ierr);
3740     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3741     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3742     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3743   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3744   PetscFunctionReturn(0);
3745 }
3746 
3747 #undef __FUNCT__
3748 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3749 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3750 {
3751   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3752   PetscErrorCode ierr;
3753 
3754   PetscFunctionBegin;
3755   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3756   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3762 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3763 {
3764   PetscErrorCode ierr;
3765   PetscInt       m,N,i,rstart,nnz,Ii;
3766   PetscInt       *indx;
3767   PetscScalar    *values;
3768 
3769   PetscFunctionBegin;
3770   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3771   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3772     PetscInt       *dnz,*onz,sum,bs,cbs;
3773 
3774     if (n == PETSC_DECIDE) {
3775       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3776     }
3777     /* Check sum(n) = N */
3778     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3779     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3780 
3781     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3782     rstart -= m;
3783 
3784     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3785     for (i=0; i<m; i++) {
3786       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3787       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3788       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3789     }
3790 
3791     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3792     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3793     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3794     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3795     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3796     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3797     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3798   }
3799 
3800   /* numeric phase */
3801   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3802   for (i=0; i<m; i++) {
3803     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3804     Ii   = i + rstart;
3805     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3806     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3807   }
3808   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3809   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3810   PetscFunctionReturn(0);
3811 }
3812 
3813 #undef __FUNCT__
3814 #define __FUNCT__ "MatFileSplit"
3815 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3816 {
3817   PetscErrorCode    ierr;
3818   PetscMPIInt       rank;
3819   PetscInt          m,N,i,rstart,nnz;
3820   size_t            len;
3821   const PetscInt    *indx;
3822   PetscViewer       out;
3823   char              *name;
3824   Mat               B;
3825   const PetscScalar *values;
3826 
3827   PetscFunctionBegin;
3828   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3829   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3830   /* Should this be the type of the diagonal block of A? */
3831   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3832   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3833   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3834   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3835   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3836   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3837   for (i=0; i<m; i++) {
3838     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3839     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3840     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3841   }
3842   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3843   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3844 
3845   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3846   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3847   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3848   sprintf(name,"%s.%d",outfile,rank);
3849   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3850   ierr = PetscFree(name);CHKERRQ(ierr);
3851   ierr = MatView(B,out);CHKERRQ(ierr);
3852   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3853   ierr = MatDestroy(&B);CHKERRQ(ierr);
3854   PetscFunctionReturn(0);
3855 }
3856 
3857 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3858 #undef __FUNCT__
3859 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3860 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3861 {
3862   PetscErrorCode      ierr;
3863   Mat_Merge_SeqsToMPI *merge;
3864   PetscContainer      container;
3865 
3866   PetscFunctionBegin;
3867   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3868   if (container) {
3869     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3870     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3871     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3872     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3873     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3874     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3875     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3876     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3877     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3878     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3879     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3880     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3881     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3882     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3883     ierr = PetscFree(merge);CHKERRQ(ierr);
3884     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3885   }
3886   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3887   PetscFunctionReturn(0);
3888 }
3889 
3890 #include <../src/mat/utils/freespace.h>
3891 #include <petscbt.h>
3892 
3893 #undef __FUNCT__
3894 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3895 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3896 {
3897   PetscErrorCode      ierr;
3898   MPI_Comm            comm;
3899   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3900   PetscMPIInt         size,rank,taga,*len_s;
3901   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3902   PetscInt            proc,m;
3903   PetscInt            **buf_ri,**buf_rj;
3904   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3905   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3906   MPI_Request         *s_waits,*r_waits;
3907   MPI_Status          *status;
3908   MatScalar           *aa=a->a;
3909   MatScalar           **abuf_r,*ba_i;
3910   Mat_Merge_SeqsToMPI *merge;
3911   PetscContainer      container;
3912 
3913   PetscFunctionBegin;
3914   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
3915   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3916 
3917   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3918   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3919 
3920   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3921   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3922 
3923   bi     = merge->bi;
3924   bj     = merge->bj;
3925   buf_ri = merge->buf_ri;
3926   buf_rj = merge->buf_rj;
3927 
3928   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
3929   owners = merge->rowmap->range;
3930   len_s  = merge->len_s;
3931 
3932   /* send and recv matrix values */
3933   /*-----------------------------*/
3934   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3935   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3936 
3937   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
3938   for (proc=0,k=0; proc<size; proc++) {
3939     if (!len_s[proc]) continue;
3940     i    = owners[proc];
3941     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3942     k++;
3943   }
3944 
3945   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3946   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3947   ierr = PetscFree(status);CHKERRQ(ierr);
3948 
3949   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3950   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3951 
3952   /* insert mat values of mpimat */
3953   /*----------------------------*/
3954   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
3955   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
3956 
3957   for (k=0; k<merge->nrecv; k++) {
3958     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3959     nrows       = *(buf_ri_k[k]);
3960     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3961     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
3962   }
3963 
3964   /* set values of ba */
3965   m = merge->rowmap->n;
3966   for (i=0; i<m; i++) {
3967     arow = owners[rank] + i;
3968     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3969     bnzi = bi[i+1] - bi[i];
3970     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3971 
3972     /* add local non-zero vals of this proc's seqmat into ba */
3973     anzi   = ai[arow+1] - ai[arow];
3974     aj     = a->j + ai[arow];
3975     aa     = a->a + ai[arow];
3976     nextaj = 0;
3977     for (j=0; nextaj<anzi; j++) {
3978       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
3979         ba_i[j] += aa[nextaj++];
3980       }
3981     }
3982 
3983     /* add received vals into ba */
3984     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
3985       /* i-th row */
3986       if (i == *nextrow[k]) {
3987         anzi   = *(nextai[k]+1) - *nextai[k];
3988         aj     = buf_rj[k] + *(nextai[k]);
3989         aa     = abuf_r[k] + *(nextai[k]);
3990         nextaj = 0;
3991         for (j=0; nextaj<anzi; j++) {
3992           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
3993             ba_i[j] += aa[nextaj++];
3994           }
3995         }
3996         nextrow[k]++; nextai[k]++;
3997       }
3998     }
3999     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4000   }
4001   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4002   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4003 
4004   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4005   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4006   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4007   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4008   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4009   PetscFunctionReturn(0);
4010 }
4011 
4012 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4013 
4014 #undef __FUNCT__
4015 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4016 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4017 {
4018   PetscErrorCode      ierr;
4019   Mat                 B_mpi;
4020   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4021   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4022   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4023   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4024   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4025   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4026   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4027   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4028   MPI_Status          *status;
4029   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4030   PetscBT             lnkbt;
4031   Mat_Merge_SeqsToMPI *merge;
4032   PetscContainer      container;
4033 
4034   PetscFunctionBegin;
4035   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4036 
4037   /* make sure it is a PETSc comm */
4038   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4039   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4040   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4041 
4042   ierr = PetscNew(&merge);CHKERRQ(ierr);
4043   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4044 
4045   /* determine row ownership */
4046   /*---------------------------------------------------------*/
4047   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4048   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4049   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4050   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4051   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4052   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4053   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4054 
4055   m      = merge->rowmap->n;
4056   owners = merge->rowmap->range;
4057 
4058   /* determine the number of messages to send, their lengths */
4059   /*---------------------------------------------------------*/
4060   len_s = merge->len_s;
4061 
4062   len          = 0; /* length of buf_si[] */
4063   merge->nsend = 0;
4064   for (proc=0; proc<size; proc++) {
4065     len_si[proc] = 0;
4066     if (proc == rank) {
4067       len_s[proc] = 0;
4068     } else {
4069       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4070       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4071     }
4072     if (len_s[proc]) {
4073       merge->nsend++;
4074       nrows = 0;
4075       for (i=owners[proc]; i<owners[proc+1]; i++) {
4076         if (ai[i+1] > ai[i]) nrows++;
4077       }
4078       len_si[proc] = 2*(nrows+1);
4079       len         += len_si[proc];
4080     }
4081   }
4082 
4083   /* determine the number and length of messages to receive for ij-structure */
4084   /*-------------------------------------------------------------------------*/
4085   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4086   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4087 
4088   /* post the Irecv of j-structure */
4089   /*-------------------------------*/
4090   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4091   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4092 
4093   /* post the Isend of j-structure */
4094   /*--------------------------------*/
4095   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4096 
4097   for (proc=0, k=0; proc<size; proc++) {
4098     if (!len_s[proc]) continue;
4099     i    = owners[proc];
4100     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4101     k++;
4102   }
4103 
4104   /* receives and sends of j-structure are complete */
4105   /*------------------------------------------------*/
4106   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4107   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4108 
4109   /* send and recv i-structure */
4110   /*---------------------------*/
4111   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4112   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4113 
4114   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4115   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4116   for (proc=0,k=0; proc<size; proc++) {
4117     if (!len_s[proc]) continue;
4118     /* form outgoing message for i-structure:
4119          buf_si[0]:                 nrows to be sent
4120                [1:nrows]:           row index (global)
4121                [nrows+1:2*nrows+1]: i-structure index
4122     */
4123     /*-------------------------------------------*/
4124     nrows       = len_si[proc]/2 - 1;
4125     buf_si_i    = buf_si + nrows+1;
4126     buf_si[0]   = nrows;
4127     buf_si_i[0] = 0;
4128     nrows       = 0;
4129     for (i=owners[proc]; i<owners[proc+1]; i++) {
4130       anzi = ai[i+1] - ai[i];
4131       if (anzi) {
4132         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4133         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4134         nrows++;
4135       }
4136     }
4137     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4138     k++;
4139     buf_si += len_si[proc];
4140   }
4141 
4142   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4143   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4144 
4145   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4146   for (i=0; i<merge->nrecv; i++) {
4147     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4148   }
4149 
4150   ierr = PetscFree(len_si);CHKERRQ(ierr);
4151   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4152   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4153   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4154   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4155   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4156   ierr = PetscFree(status);CHKERRQ(ierr);
4157 
4158   /* compute a local seq matrix in each processor */
4159   /*----------------------------------------------*/
4160   /* allocate bi array and free space for accumulating nonzero column info */
4161   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4162   bi[0] = 0;
4163 
4164   /* create and initialize a linked list */
4165   nlnk = N+1;
4166   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4167 
4168   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4169   len  = ai[owners[rank+1]] - ai[owners[rank]];
4170   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4171 
4172   current_space = free_space;
4173 
4174   /* determine symbolic info for each local row */
4175   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4176 
4177   for (k=0; k<merge->nrecv; k++) {
4178     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4179     nrows       = *buf_ri_k[k];
4180     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4181     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4182   }
4183 
4184   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4185   len  = 0;
4186   for (i=0; i<m; i++) {
4187     bnzi = 0;
4188     /* add local non-zero cols of this proc's seqmat into lnk */
4189     arow  = owners[rank] + i;
4190     anzi  = ai[arow+1] - ai[arow];
4191     aj    = a->j + ai[arow];
4192     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4193     bnzi += nlnk;
4194     /* add received col data into lnk */
4195     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4196       if (i == *nextrow[k]) { /* i-th row */
4197         anzi  = *(nextai[k]+1) - *nextai[k];
4198         aj    = buf_rj[k] + *nextai[k];
4199         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4200         bnzi += nlnk;
4201         nextrow[k]++; nextai[k]++;
4202       }
4203     }
4204     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4205 
4206     /* if free space is not available, make more free space */
4207     if (current_space->local_remaining<bnzi) {
4208       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4209       nspacedouble++;
4210     }
4211     /* copy data into free space, then initialize lnk */
4212     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4213     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4214 
4215     current_space->array           += bnzi;
4216     current_space->local_used      += bnzi;
4217     current_space->local_remaining -= bnzi;
4218 
4219     bi[i+1] = bi[i] + bnzi;
4220   }
4221 
4222   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4223 
4224   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4225   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4226   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4227 
4228   /* create symbolic parallel matrix B_mpi */
4229   /*---------------------------------------*/
4230   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4231   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4232   if (n==PETSC_DECIDE) {
4233     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4234   } else {
4235     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4236   }
4237   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4238   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4239   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4240   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4241   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4242 
4243   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4244   B_mpi->assembled    = PETSC_FALSE;
4245   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4246   merge->bi           = bi;
4247   merge->bj           = bj;
4248   merge->buf_ri       = buf_ri;
4249   merge->buf_rj       = buf_rj;
4250   merge->coi          = NULL;
4251   merge->coj          = NULL;
4252   merge->owners_co    = NULL;
4253 
4254   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4255 
4256   /* attach the supporting struct to B_mpi for reuse */
4257   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4258   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4259   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4260   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4261   *mpimat = B_mpi;
4262 
4263   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4264   PetscFunctionReturn(0);
4265 }
4266 
4267 #undef __FUNCT__
4268 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4269 /*@C
4270       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4271                  matrices from each processor
4272 
4273     Collective on MPI_Comm
4274 
4275    Input Parameters:
4276 +    comm - the communicators the parallel matrix will live on
4277 .    seqmat - the input sequential matrices
4278 .    m - number of local rows (or PETSC_DECIDE)
4279 .    n - number of local columns (or PETSC_DECIDE)
4280 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4281 
4282    Output Parameter:
4283 .    mpimat - the parallel matrix generated
4284 
4285     Level: advanced
4286 
4287    Notes:
4288      The dimensions of the sequential matrix in each processor MUST be the same.
4289      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4290      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4291 @*/
4292 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4293 {
4294   PetscErrorCode ierr;
4295   PetscMPIInt    size;
4296 
4297   PetscFunctionBegin;
4298   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4299   if (size == 1) {
4300     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4301     if (scall == MAT_INITIAL_MATRIX) {
4302       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4303     } else {
4304       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4305     }
4306     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4307     PetscFunctionReturn(0);
4308   }
4309   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4310   if (scall == MAT_INITIAL_MATRIX) {
4311     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4312   }
4313   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4314   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4315   PetscFunctionReturn(0);
4316 }
4317 
4318 #undef __FUNCT__
4319 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4320 /*@
4321      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4322           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4323           with MatGetSize()
4324 
4325     Not Collective
4326 
4327    Input Parameters:
4328 +    A - the matrix
4329 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4330 
4331    Output Parameter:
4332 .    A_loc - the local sequential matrix generated
4333 
4334     Level: developer
4335 
4336 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4337 
4338 @*/
4339 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4340 {
4341   PetscErrorCode ierr;
4342   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4343   Mat_SeqAIJ     *mat,*a,*b;
4344   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4345   MatScalar      *aa,*ba,*cam;
4346   PetscScalar    *ca;
4347   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4348   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4349   PetscBool      match;
4350   MPI_Comm       comm;
4351   PetscMPIInt    size;
4352 
4353   PetscFunctionBegin;
4354   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4355   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4356   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4357   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4358   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4359 
4360   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4361   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4362   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4363   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4364   aa = a->a; ba = b->a;
4365   if (scall == MAT_INITIAL_MATRIX) {
4366     if (size == 1) {
4367       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4368       PetscFunctionReturn(0);
4369     }
4370 
4371     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4372     ci[0] = 0;
4373     for (i=0; i<am; i++) {
4374       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4375     }
4376     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4377     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4378     k    = 0;
4379     for (i=0; i<am; i++) {
4380       ncols_o = bi[i+1] - bi[i];
4381       ncols_d = ai[i+1] - ai[i];
4382       /* off-diagonal portion of A */
4383       for (jo=0; jo<ncols_o; jo++) {
4384         col = cmap[*bj];
4385         if (col >= cstart) break;
4386         cj[k]   = col; bj++;
4387         ca[k++] = *ba++;
4388       }
4389       /* diagonal portion of A */
4390       for (j=0; j<ncols_d; j++) {
4391         cj[k]   = cstart + *aj++;
4392         ca[k++] = *aa++;
4393       }
4394       /* off-diagonal portion of A */
4395       for (j=jo; j<ncols_o; j++) {
4396         cj[k]   = cmap[*bj++];
4397         ca[k++] = *ba++;
4398       }
4399     }
4400     /* put together the new matrix */
4401     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4402     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4403     /* Since these are PETSc arrays, change flags to free them as necessary. */
4404     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4405     mat->free_a  = PETSC_TRUE;
4406     mat->free_ij = PETSC_TRUE;
4407     mat->nonew   = 0;
4408   } else if (scall == MAT_REUSE_MATRIX) {
4409     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4410     ci = mat->i; cj = mat->j; cam = mat->a;
4411     for (i=0; i<am; i++) {
4412       /* off-diagonal portion of A */
4413       ncols_o = bi[i+1] - bi[i];
4414       for (jo=0; jo<ncols_o; jo++) {
4415         col = cmap[*bj];
4416         if (col >= cstart) break;
4417         *cam++ = *ba++; bj++;
4418       }
4419       /* diagonal portion of A */
4420       ncols_d = ai[i+1] - ai[i];
4421       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4422       /* off-diagonal portion of A */
4423       for (j=jo; j<ncols_o; j++) {
4424         *cam++ = *ba++; bj++;
4425       }
4426     }
4427   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4428   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4429   PetscFunctionReturn(0);
4430 }
4431 
4432 #undef __FUNCT__
4433 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4434 /*@C
4435      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4436 
4437     Not Collective
4438 
4439    Input Parameters:
4440 +    A - the matrix
4441 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4442 -    row, col - index sets of rows and columns to extract (or NULL)
4443 
4444    Output Parameter:
4445 .    A_loc - the local sequential matrix generated
4446 
4447     Level: developer
4448 
4449 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4450 
4451 @*/
4452 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4453 {
4454   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4455   PetscErrorCode ierr;
4456   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4457   IS             isrowa,iscola;
4458   Mat            *aloc;
4459   PetscBool      match;
4460 
4461   PetscFunctionBegin;
4462   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4463   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4464   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4465   if (!row) {
4466     start = A->rmap->rstart; end = A->rmap->rend;
4467     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4468   } else {
4469     isrowa = *row;
4470   }
4471   if (!col) {
4472     start = A->cmap->rstart;
4473     cmap  = a->garray;
4474     nzA   = a->A->cmap->n;
4475     nzB   = a->B->cmap->n;
4476     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4477     ncols = 0;
4478     for (i=0; i<nzB; i++) {
4479       if (cmap[i] < start) idx[ncols++] = cmap[i];
4480       else break;
4481     }
4482     imark = i;
4483     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4484     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4485     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4486   } else {
4487     iscola = *col;
4488   }
4489   if (scall != MAT_INITIAL_MATRIX) {
4490     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4491     aloc[0] = *A_loc;
4492   }
4493   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4494   *A_loc = aloc[0];
4495   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4496   if (!row) {
4497     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4498   }
4499   if (!col) {
4500     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4501   }
4502   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4503   PetscFunctionReturn(0);
4504 }
4505 
4506 #undef __FUNCT__
4507 #define __FUNCT__ "MatGetBrowsOfAcols"
4508 /*@C
4509     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4510 
4511     Collective on Mat
4512 
4513    Input Parameters:
4514 +    A,B - the matrices in mpiaij format
4515 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4516 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4517 
4518    Output Parameter:
4519 +    rowb, colb - index sets of rows and columns of B to extract
4520 -    B_seq - the sequential matrix generated
4521 
4522     Level: developer
4523 
4524 @*/
4525 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4526 {
4527   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4528   PetscErrorCode ierr;
4529   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4530   IS             isrowb,iscolb;
4531   Mat            *bseq=NULL;
4532 
4533   PetscFunctionBegin;
4534   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4535     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4536   }
4537   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4538 
4539   if (scall == MAT_INITIAL_MATRIX) {
4540     start = A->cmap->rstart;
4541     cmap  = a->garray;
4542     nzA   = a->A->cmap->n;
4543     nzB   = a->B->cmap->n;
4544     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4545     ncols = 0;
4546     for (i=0; i<nzB; i++) {  /* row < local row index */
4547       if (cmap[i] < start) idx[ncols++] = cmap[i];
4548       else break;
4549     }
4550     imark = i;
4551     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4552     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4553     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4554     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4555   } else {
4556     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4557     isrowb  = *rowb; iscolb = *colb;
4558     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4559     bseq[0] = *B_seq;
4560   }
4561   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4562   *B_seq = bseq[0];
4563   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4564   if (!rowb) {
4565     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4566   } else {
4567     *rowb = isrowb;
4568   }
4569   if (!colb) {
4570     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4571   } else {
4572     *colb = iscolb;
4573   }
4574   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4575   PetscFunctionReturn(0);
4576 }
4577 
4578 #undef __FUNCT__
4579 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4580 /*
4581     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4582     of the OFF-DIAGONAL portion of local A
4583 
4584     Collective on Mat
4585 
4586    Input Parameters:
4587 +    A,B - the matrices in mpiaij format
4588 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4589 
4590    Output Parameter:
4591 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4592 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4593 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4594 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4595 
4596     Level: developer
4597 
4598 */
4599 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4600 {
4601   VecScatter_MPI_General *gen_to,*gen_from;
4602   PetscErrorCode         ierr;
4603   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4604   Mat_SeqAIJ             *b_oth;
4605   VecScatter             ctx =a->Mvctx;
4606   MPI_Comm               comm;
4607   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4608   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4609   PetscScalar            *rvalues,*svalues;
4610   MatScalar              *b_otha,*bufa,*bufA;
4611   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4612   MPI_Request            *rwaits = NULL,*swaits = NULL;
4613   MPI_Status             *sstatus,rstatus;
4614   PetscMPIInt            jj,size;
4615   PetscInt               *cols,sbs,rbs;
4616   PetscScalar            *vals;
4617 
4618   PetscFunctionBegin;
4619   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4620   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4621 
4622   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4623     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4624   }
4625   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4626   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4627 
4628   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4629   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4630   rvalues  = gen_from->values; /* holds the length of receiving row */
4631   svalues  = gen_to->values;   /* holds the length of sending row */
4632   nrecvs   = gen_from->n;
4633   nsends   = gen_to->n;
4634 
4635   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4636   srow    = gen_to->indices;    /* local row index to be sent */
4637   sstarts = gen_to->starts;
4638   sprocs  = gen_to->procs;
4639   sstatus = gen_to->sstatus;
4640   sbs     = gen_to->bs;
4641   rstarts = gen_from->starts;
4642   rprocs  = gen_from->procs;
4643   rbs     = gen_from->bs;
4644 
4645   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4646   if (scall == MAT_INITIAL_MATRIX) {
4647     /* i-array */
4648     /*---------*/
4649     /*  post receives */
4650     for (i=0; i<nrecvs; i++) {
4651       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4652       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4653       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4654     }
4655 
4656     /* pack the outgoing message */
4657     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4658 
4659     sstartsj[0] = 0;
4660     rstartsj[0] = 0;
4661     len         = 0; /* total length of j or a array to be sent */
4662     k           = 0;
4663     for (i=0; i<nsends; i++) {
4664       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4665       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4666       for (j=0; j<nrows; j++) {
4667         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4668         for (l=0; l<sbs; l++) {
4669           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4670 
4671           rowlen[j*sbs+l] = ncols;
4672 
4673           len += ncols;
4674           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4675         }
4676         k++;
4677       }
4678       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4679 
4680       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4681     }
4682     /* recvs and sends of i-array are completed */
4683     i = nrecvs;
4684     while (i--) {
4685       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4686     }
4687     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4688 
4689     /* allocate buffers for sending j and a arrays */
4690     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4691     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4692 
4693     /* create i-array of B_oth */
4694     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4695 
4696     b_othi[0] = 0;
4697     len       = 0; /* total length of j or a array to be received */
4698     k         = 0;
4699     for (i=0; i<nrecvs; i++) {
4700       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4701       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4702       for (j=0; j<nrows; j++) {
4703         b_othi[k+1] = b_othi[k] + rowlen[j];
4704         len        += rowlen[j]; k++;
4705       }
4706       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4707     }
4708 
4709     /* allocate space for j and a arrrays of B_oth */
4710     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4711     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4712 
4713     /* j-array */
4714     /*---------*/
4715     /*  post receives of j-array */
4716     for (i=0; i<nrecvs; i++) {
4717       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4718       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4719     }
4720 
4721     /* pack the outgoing message j-array */
4722     k = 0;
4723     for (i=0; i<nsends; i++) {
4724       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4725       bufJ  = bufj+sstartsj[i];
4726       for (j=0; j<nrows; j++) {
4727         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4728         for (ll=0; ll<sbs; ll++) {
4729           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4730           for (l=0; l<ncols; l++) {
4731             *bufJ++ = cols[l];
4732           }
4733           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4734         }
4735       }
4736       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4737     }
4738 
4739     /* recvs and sends of j-array are completed */
4740     i = nrecvs;
4741     while (i--) {
4742       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4743     }
4744     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4745   } else if (scall == MAT_REUSE_MATRIX) {
4746     sstartsj = *startsj_s;
4747     rstartsj = *startsj_r;
4748     bufa     = *bufa_ptr;
4749     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4750     b_otha   = b_oth->a;
4751   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4752 
4753   /* a-array */
4754   /*---------*/
4755   /*  post receives of a-array */
4756   for (i=0; i<nrecvs; i++) {
4757     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4758     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4759   }
4760 
4761   /* pack the outgoing message a-array */
4762   k = 0;
4763   for (i=0; i<nsends; i++) {
4764     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4765     bufA  = bufa+sstartsj[i];
4766     for (j=0; j<nrows; j++) {
4767       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4768       for (ll=0; ll<sbs; ll++) {
4769         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4770         for (l=0; l<ncols; l++) {
4771           *bufA++ = vals[l];
4772         }
4773         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4774       }
4775     }
4776     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4777   }
4778   /* recvs and sends of a-array are completed */
4779   i = nrecvs;
4780   while (i--) {
4781     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4782   }
4783   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4784   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4785 
4786   if (scall == MAT_INITIAL_MATRIX) {
4787     /* put together the new matrix */
4788     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4789 
4790     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4791     /* Since these are PETSc arrays, change flags to free them as necessary. */
4792     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4793     b_oth->free_a  = PETSC_TRUE;
4794     b_oth->free_ij = PETSC_TRUE;
4795     b_oth->nonew   = 0;
4796 
4797     ierr = PetscFree(bufj);CHKERRQ(ierr);
4798     if (!startsj_s || !bufa_ptr) {
4799       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4800       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4801     } else {
4802       *startsj_s = sstartsj;
4803       *startsj_r = rstartsj;
4804       *bufa_ptr  = bufa;
4805     }
4806   }
4807   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4808   PetscFunctionReturn(0);
4809 }
4810 
4811 #undef __FUNCT__
4812 #define __FUNCT__ "MatGetCommunicationStructs"
4813 /*@C
4814   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4815 
4816   Not Collective
4817 
4818   Input Parameters:
4819 . A - The matrix in mpiaij format
4820 
4821   Output Parameter:
4822 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4823 . colmap - A map from global column index to local index into lvec
4824 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4825 
4826   Level: developer
4827 
4828 @*/
4829 #if defined(PETSC_USE_CTABLE)
4830 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4831 #else
4832 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4833 #endif
4834 {
4835   Mat_MPIAIJ *a;
4836 
4837   PetscFunctionBegin;
4838   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4839   PetscValidPointer(lvec, 2);
4840   PetscValidPointer(colmap, 3);
4841   PetscValidPointer(multScatter, 4);
4842   a = (Mat_MPIAIJ*) A->data;
4843   if (lvec) *lvec = a->lvec;
4844   if (colmap) *colmap = a->colmap;
4845   if (multScatter) *multScatter = a->Mvctx;
4846   PetscFunctionReturn(0);
4847 }
4848 
4849 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4850 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4851 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4852 #if defined(PETSC_HAVE_ELEMENTAL)
4853 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4854 #endif
4855 
4856 #undef __FUNCT__
4857 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4858 /*
4859     Computes (B'*A')' since computing B*A directly is untenable
4860 
4861                n                       p                          p
4862         (              )       (              )         (                  )
4863       m (      A       )  *  n (       B      )   =   m (         C        )
4864         (              )       (              )         (                  )
4865 
4866 */
4867 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4868 {
4869   PetscErrorCode ierr;
4870   Mat            At,Bt,Ct;
4871 
4872   PetscFunctionBegin;
4873   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4874   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4875   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4876   ierr = MatDestroy(&At);CHKERRQ(ierr);
4877   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4878   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4879   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4880   PetscFunctionReturn(0);
4881 }
4882 
4883 #undef __FUNCT__
4884 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4885 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4886 {
4887   PetscErrorCode ierr;
4888   PetscInt       m=A->rmap->n,n=B->cmap->n;
4889   Mat            Cmat;
4890 
4891   PetscFunctionBegin;
4892   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4893   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4894   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4895   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4896   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4897   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4898   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4899   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4900 
4901   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4902 
4903   *C = Cmat;
4904   PetscFunctionReturn(0);
4905 }
4906 
4907 /* ----------------------------------------------------------------*/
4908 #undef __FUNCT__
4909 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4910 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4911 {
4912   PetscErrorCode ierr;
4913 
4914   PetscFunctionBegin;
4915   if (scall == MAT_INITIAL_MATRIX) {
4916     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4917     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4918     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
4919   }
4920   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4921   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4922   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
4923   PetscFunctionReturn(0);
4924 }
4925 
4926 /*MC
4927    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4928 
4929    Options Database Keys:
4930 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4931 
4932   Level: beginner
4933 
4934 .seealso: MatCreateAIJ()
4935 M*/
4936 
4937 #undef __FUNCT__
4938 #define __FUNCT__ "MatCreate_MPIAIJ"
4939 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
4940 {
4941   Mat_MPIAIJ     *b;
4942   PetscErrorCode ierr;
4943   PetscMPIInt    size;
4944 
4945   PetscFunctionBegin;
4946   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
4947 
4948   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
4949   B->data       = (void*)b;
4950   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4951   B->assembled  = PETSC_FALSE;
4952   B->insertmode = NOT_SET_VALUES;
4953   b->size       = size;
4954 
4955   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
4956 
4957   /* build cache for off array entries formed */
4958   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
4959 
4960   b->donotstash  = PETSC_FALSE;
4961   b->colmap      = 0;
4962   b->garray      = 0;
4963   b->roworiented = PETSC_TRUE;
4964 
4965   /* stuff used for matrix vector multiply */
4966   b->lvec  = NULL;
4967   b->Mvctx = NULL;
4968 
4969   /* stuff for MatGetRow() */
4970   b->rowindices   = 0;
4971   b->rowvalues    = 0;
4972   b->getrowactive = PETSC_FALSE;
4973 
4974   /* flexible pointer used in CUSP/CUSPARSE classes */
4975   b->spptr = NULL;
4976 
4977   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4978   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4979   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4980   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4981   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4982   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4983   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4984   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
4985   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
4986   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
4987 #if defined(PETSC_HAVE_ELEMENTAL)
4988   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
4989 #endif
4990   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
4991   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
4992   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
4993   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4994   PetscFunctionReturn(0);
4995 }
4996 
4997 #undef __FUNCT__
4998 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4999 /*@C
5000      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5001          and "off-diagonal" part of the matrix in CSR format.
5002 
5003    Collective on MPI_Comm
5004 
5005    Input Parameters:
5006 +  comm - MPI communicator
5007 .  m - number of local rows (Cannot be PETSC_DECIDE)
5008 .  n - This value should be the same as the local size used in creating the
5009        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5010        calculated if N is given) For square matrices n is almost always m.
5011 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5012 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5013 .   i - row indices for "diagonal" portion of matrix
5014 .   j - column indices
5015 .   a - matrix values
5016 .   oi - row indices for "off-diagonal" portion of matrix
5017 .   oj - column indices
5018 -   oa - matrix values
5019 
5020    Output Parameter:
5021 .   mat - the matrix
5022 
5023    Level: advanced
5024 
5025    Notes:
5026        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5027        must free the arrays once the matrix has been destroyed and not before.
5028 
5029        The i and j indices are 0 based
5030 
5031        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5032 
5033        This sets local rows and cannot be used to set off-processor values.
5034 
5035        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5036        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5037        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5038        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5039        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5040        communication if it is known that only local entries will be set.
5041 
5042 .keywords: matrix, aij, compressed row, sparse, parallel
5043 
5044 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5045           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5046 @*/
5047 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5048 {
5049   PetscErrorCode ierr;
5050   Mat_MPIAIJ     *maij;
5051 
5052   PetscFunctionBegin;
5053   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5054   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5055   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5056   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5057   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5058   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5059   maij = (Mat_MPIAIJ*) (*mat)->data;
5060 
5061   (*mat)->preallocated = PETSC_TRUE;
5062 
5063   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5064   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5065 
5066   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5067   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5068 
5069   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5070   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5071   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5072   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5073 
5074   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5075   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5076   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5077   PetscFunctionReturn(0);
5078 }
5079 
5080 /*
5081     Special version for direct calls from Fortran
5082 */
5083 #include <petsc/private/fortranimpl.h>
5084 
5085 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5086 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5087 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5088 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5089 #endif
5090 
5091 /* Change these macros so can be used in void function */
5092 #undef CHKERRQ
5093 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5094 #undef SETERRQ2
5095 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5096 #undef SETERRQ3
5097 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5098 #undef SETERRQ
5099 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5100 
5101 #undef __FUNCT__
5102 #define __FUNCT__ "matsetvaluesmpiaij_"
5103 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5104 {
5105   Mat            mat  = *mmat;
5106   PetscInt       m    = *mm, n = *mn;
5107   InsertMode     addv = *maddv;
5108   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5109   PetscScalar    value;
5110   PetscErrorCode ierr;
5111 
5112   MatCheckPreallocated(mat,1);
5113   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5114 
5115 #if defined(PETSC_USE_DEBUG)
5116   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5117 #endif
5118   {
5119     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5120     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5121     PetscBool roworiented = aij->roworiented;
5122 
5123     /* Some Variables required in the macro */
5124     Mat        A                 = aij->A;
5125     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5126     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5127     MatScalar  *aa               = a->a;
5128     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5129     Mat        B                 = aij->B;
5130     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5131     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5132     MatScalar  *ba               = b->a;
5133 
5134     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5135     PetscInt  nonew = a->nonew;
5136     MatScalar *ap1,*ap2;
5137 
5138     PetscFunctionBegin;
5139     for (i=0; i<m; i++) {
5140       if (im[i] < 0) continue;
5141 #if defined(PETSC_USE_DEBUG)
5142       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5143 #endif
5144       if (im[i] >= rstart && im[i] < rend) {
5145         row      = im[i] - rstart;
5146         lastcol1 = -1;
5147         rp1      = aj + ai[row];
5148         ap1      = aa + ai[row];
5149         rmax1    = aimax[row];
5150         nrow1    = ailen[row];
5151         low1     = 0;
5152         high1    = nrow1;
5153         lastcol2 = -1;
5154         rp2      = bj + bi[row];
5155         ap2      = ba + bi[row];
5156         rmax2    = bimax[row];
5157         nrow2    = bilen[row];
5158         low2     = 0;
5159         high2    = nrow2;
5160 
5161         for (j=0; j<n; j++) {
5162           if (roworiented) value = v[i*n+j];
5163           else value = v[i+j*m];
5164           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5165           if (in[j] >= cstart && in[j] < cend) {
5166             col = in[j] - cstart;
5167             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5168           } else if (in[j] < 0) continue;
5169 #if defined(PETSC_USE_DEBUG)
5170           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5171 #endif
5172           else {
5173             if (mat->was_assembled) {
5174               if (!aij->colmap) {
5175                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5176               }
5177 #if defined(PETSC_USE_CTABLE)
5178               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5179               col--;
5180 #else
5181               col = aij->colmap[in[j]] - 1;
5182 #endif
5183               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5184                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5185                 col  =  in[j];
5186                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5187                 B     = aij->B;
5188                 b     = (Mat_SeqAIJ*)B->data;
5189                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5190                 rp2   = bj + bi[row];
5191                 ap2   = ba + bi[row];
5192                 rmax2 = bimax[row];
5193                 nrow2 = bilen[row];
5194                 low2  = 0;
5195                 high2 = nrow2;
5196                 bm    = aij->B->rmap->n;
5197                 ba    = b->a;
5198               }
5199             } else col = in[j];
5200             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5201           }
5202         }
5203       } else if (!aij->donotstash) {
5204         if (roworiented) {
5205           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5206         } else {
5207           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5208         }
5209       }
5210     }
5211   }
5212   PetscFunctionReturnVoid();
5213 }
5214 
5215