xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision bbd98b0ddc3b728de2842e56499debb5c79c81f3)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc/private/vecimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 
8 /*MC
9    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10 
11    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
12    and MATMPIAIJ otherwise.  As a result, for single process communicators,
13   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
14   for communicators controlling multiple processes.  It is recommended that you call both of
15   the above preallocation routines for simplicity.
16 
17    Options Database Keys:
18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19 
20   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
21    enough exist.
22 
23   Level: beginner
24 
25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
26 M*/
27 
28 /*MC
29    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30 
31    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
32    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
33    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
34   for communicators controlling multiple processes.  It is recommended that you call both of
35   the above preallocation routines for simplicity.
36 
37    Options Database Keys:
38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39 
40   Level: beginner
41 
42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
43 M*/
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
48 {
49   PetscErrorCode  ierr;
50   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
51   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
52   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
53   const PetscInt  *ia,*ib;
54   const MatScalar *aa,*bb;
55   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
56   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
57 
58   PetscFunctionBegin;
59   *keptrows = 0;
60   ia        = a->i;
61   ib        = b->i;
62   for (i=0; i<m; i++) {
63     na = ia[i+1] - ia[i];
64     nb = ib[i+1] - ib[i];
65     if (!na && !nb) {
66       cnt++;
67       goto ok1;
68     }
69     aa = a->a + ia[i];
70     for (j=0; j<na; j++) {
71       if (aa[j] != 0.0) goto ok1;
72     }
73     bb = b->a + ib[i];
74     for (j=0; j <nb; j++) {
75       if (bb[j] != 0.0) goto ok1;
76     }
77     cnt++;
78 ok1:;
79   }
80   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
81   if (!n0rows) PetscFunctionReturn(0);
82   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
83   cnt  = 0;
84   for (i=0; i<m; i++) {
85     na = ia[i+1] - ia[i];
86     nb = ib[i+1] - ib[i];
87     if (!na && !nb) continue;
88     aa = a->a + ia[i];
89     for (j=0; j<na;j++) {
90       if (aa[j] != 0.0) {
91         rows[cnt++] = rstart + i;
92         goto ok2;
93       }
94     }
95     bb = b->a + ib[i];
96     for (j=0; j<nb; j++) {
97       if (bb[j] != 0.0) {
98         rows[cnt++] = rstart + i;
99         goto ok2;
100       }
101     }
102 ok2:;
103   }
104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
110 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
111 {
112   PetscErrorCode    ierr;
113   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
114 
115   PetscFunctionBegin;
116   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
117     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
118   } else {
119     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
128 {
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
130   PetscErrorCode ierr;
131   PetscInt       i,rstart,nrows,*rows;
132 
133   PetscFunctionBegin;
134   *zrows = NULL;
135   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
136   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
137   for (i=0; i<nrows; i++) rows[i] += rstart;
138   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
145 {
146   PetscErrorCode ierr;
147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
148   PetscInt       i,n,*garray = aij->garray;
149   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
150   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
151   PetscReal      *work;
152 
153   PetscFunctionBegin;
154   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
155   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
156   if (type == NORM_2) {
157     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
158       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
159     }
160     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
161       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
162     }
163   } else if (type == NORM_1) {
164     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
165       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
166     }
167     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
168       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
169     }
170   } else if (type == NORM_INFINITY) {
171     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
172       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
173     }
174     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
175       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
176     }
177 
178   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
179   if (type == NORM_INFINITY) {
180     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
181   } else {
182     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
183   }
184   ierr = PetscFree(work);CHKERRQ(ierr);
185   if (type == NORM_2) {
186     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 #undef __FUNCT__
192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ"
193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
194 {
195   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
196   IS              sis,gis;
197   PetscErrorCode  ierr;
198   const PetscInt  *isis,*igis;
199   PetscInt        n,*iis,nsis,ngis,rstart,i;
200 
201   PetscFunctionBegin;
202   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
203   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
204   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
205   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
206   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
207   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
208 
209   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
210   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
211   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
212   n    = ngis + nsis;
213   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
214   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
215   for (i=0; i<n; i++) iis[i] += rstart;
216   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
217 
218   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
220   ierr = ISDestroy(&sis);CHKERRQ(ierr);
221   ierr = ISDestroy(&gis);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatDistribute_MPIAIJ"
227 /*
228     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
229     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
230 
231     Only for square matrices
232 
233     Used by a preconditioner, hence PETSC_EXTERN
234 */
235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
236 {
237   PetscMPIInt    rank,size;
238   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
239   PetscErrorCode ierr;
240   Mat            mat;
241   Mat_SeqAIJ     *gmata;
242   PetscMPIInt    tag;
243   MPI_Status     status;
244   PetscBool      aij;
245   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
246 
247   PetscFunctionBegin;
248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
250   if (!rank) {
251     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
252     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
253   }
254   if (reuse == MAT_INITIAL_MATRIX) {
255     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
256     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
257     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
258     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
259     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
260     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
261     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
262     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
263     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
264 
265     rowners[0] = 0;
266     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
267     rstart = rowners[rank];
268     rend   = rowners[rank+1];
269     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
270     if (!rank) {
271       gmata = (Mat_SeqAIJ*) gmat->data;
272       /* send row lengths to all processors */
273       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
274       for (i=1; i<size; i++) {
275         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
276       }
277       /* determine number diagonal and off-diagonal counts */
278       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
279       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
280       jj   = 0;
281       for (i=0; i<m; i++) {
282         for (j=0; j<dlens[i]; j++) {
283           if (gmata->j[jj] < rstart) ld[i]++;
284           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
285           jj++;
286         }
287       }
288       /* send column indices to other processes */
289       for (i=1; i<size; i++) {
290         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
291         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
292         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293       }
294 
295       /* send numerical values to other processes */
296       for (i=1; i<size; i++) {
297         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
298         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
299       }
300       gmataa = gmata->a;
301       gmataj = gmata->j;
302 
303     } else {
304       /* receive row lengths */
305       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
306       /* receive column indices */
307       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
309       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       /* determine number diagonal and off-diagonal counts */
311       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
312       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
313       jj   = 0;
314       for (i=0; i<m; i++) {
315         for (j=0; j<dlens[i]; j++) {
316           if (gmataj[jj] < rstart) ld[i]++;
317           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
318           jj++;
319         }
320       }
321       /* receive numerical values */
322       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* set preallocation */
326     for (i=0; i<m; i++) {
327       dlens[i] -= olens[i];
328     }
329     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
330     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
331 
332     for (i=0; i<m; i++) {
333       dlens[i] += olens[i];
334     }
335     cnt = 0;
336     for (i=0; i<m; i++) {
337       row  = rstart + i;
338       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
339       cnt += dlens[i];
340     }
341     if (rank) {
342       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
343     }
344     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
345     ierr = PetscFree(rowners);CHKERRQ(ierr);
346 
347     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
348 
349     *inmat = mat;
350   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
351     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
352     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
353     mat  = *inmat;
354     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
355     if (!rank) {
356       /* send numerical values to other processes */
357       gmata  = (Mat_SeqAIJ*) gmat->data;
358       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
359       gmataa = gmata->a;
360       for (i=1; i<size; i++) {
361         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
362         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
363       }
364       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
365     } else {
366       /* receive numerical values from process 0*/
367       nz   = Ad->nz + Ao->nz;
368       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
369       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
370     }
371     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
372     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
373     ad = Ad->a;
374     ao = Ao->a;
375     if (mat->rmap->n) {
376       i  = 0;
377       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
378       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
379     }
380     for (i=1; i<mat->rmap->n; i++) {
381       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
382       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
383     }
384     i--;
385     if (mat->rmap->n) {
386       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
387     }
388     if (rank) {
389       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
390     }
391   }
392   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
393   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   Local utility routine that creates a mapping from the global column
399 number to the local number in the off-diagonal part of the local
400 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
401 a slightly higher hash table cost; without it it is not scalable (each processor
402 has an order N integer array but is fast to acess.
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 
463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
464   { \
465     if (col <= lastcol2) low2 = 0;                        \
466     else high2 = nrow2;                                   \
467     lastcol2 = col;                                       \
468     while (high2-low2 > 5) {                              \
469       t = (low2+high2)/2;                                 \
470       if (rp2[t] > col) high2 = t;                        \
471       else             low2  = t;                         \
472     }                                                     \
473     for (_i=low2; _i<high2; _i++) {                       \
474       if (rp2[_i] > col) break;                           \
475       if (rp2[_i] == col) {                               \
476         if (addv == ADD_VALUES) ap2[_i] += value;         \
477         else                    ap2[_i] = value;          \
478         goto b_noinsert;                                  \
479       }                                                   \
480     }                                                     \
481     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
482     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
483     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
484     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
485     N = nrow2++ - 1; b->nz++; high2++;                    \
486     /* shift up all the later entries in this row */      \
487     for (ii=N; ii>=_i; ii--) {                            \
488       rp2[ii+1] = rp2[ii];                                \
489       ap2[ii+1] = ap2[ii];                                \
490     }                                                     \
491     rp2[_i] = col;                                        \
492     ap2[_i] = value;                                      \
493     B->nonzerostate++;                                    \
494     b_noinsert: ;                                         \
495     bilen[row] = nrow2;                                   \
496   }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatSetValues_MPIAIJ"
528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
529 {
530   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
531   PetscScalar    value;
532   PetscErrorCode ierr;
533   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
534   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
535   PetscBool      roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat        A                 = aij->A;
539   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
540   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
541   MatScalar  *aa               = a->a;
542   PetscBool  ignorezeroentries = a->ignorezeroentries;
543   Mat        B                 = aij->B;
544   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
545   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
546   MatScalar  *ba               = b->a;
547 
548   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
549   PetscInt  nonew;
550   MatScalar *ap1,*ap2;
551 
552   PetscFunctionBegin;
553   for (i=0; i<m; i++) {
554     if (im[i] < 0) continue;
555 #if defined(PETSC_USE_DEBUG)
556     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
557 #endif
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = aj + ai[row];
562       ap1      = aa + ai[row];
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = bj + bi[row];
569       ap2      = ba + bi[row];
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j=0; j<n; j++) {
576         if (roworiented) value = v[i*n+j];
577         else             value = v[i+j*m];
578         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
579         if (in[j] >= cstart && in[j] < cend) {
580           col   = in[j] - cstart;
581           nonew = a->nonew;
582           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
583         } else if (in[j] < 0) continue;
584 #if defined(PETSC_USE_DEBUG)
585         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
586 #endif
587         else {
588           if (mat->was_assembled) {
589             if (!aij->colmap) {
590               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
591             }
592 #if defined(PETSC_USE_CTABLE)
593             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
594             col--;
595 #else
596             col = aij->colmap[in[j]] - 1;
597 #endif
598             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
599               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
600               col  =  in[j];
601               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
602               B     = aij->B;
603               b     = (Mat_SeqAIJ*)B->data;
604               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
605               rp2   = bj + bi[row];
606               ap2   = ba + bi[row];
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
614           } else col = in[j];
615           nonew = b->nonew;
616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!aij->donotstash) {
622         mat->assembled = PETSC_FALSE;
623         if (roworiented) {
624           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         } else {
626           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         }
628       }
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatGetValues_MPIAIJ"
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
641   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
642 
643   PetscFunctionBegin;
644   for (i=0; i<m; i++) {
645     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
646     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
647     if (idxm[i] >= rstart && idxm[i] < rend) {
648       row = idxm[i] - rstart;
649       for (j=0; j<n; j++) {
650         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
651         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
652         if (idxn[j] >= cstart && idxn[j] < cend) {
653           col  = idxn[j] - cstart;
654           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655         } else {
656           if (!aij->colmap) {
657             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
658           }
659 #if defined(PETSC_USE_CTABLE)
660           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
661           col--;
662 #else
663           col = aij->colmap[idxn[j]] - 1;
664 #endif
665           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
666           else {
667             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
668           }
669         }
670       }
671     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
681 {
682   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
683   PetscErrorCode ierr;
684   PetscInt       nstash,reallocs;
685 
686   PetscFunctionBegin;
687   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
688 
689   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
690   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
691   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
698 {
699   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
700   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
701   PetscErrorCode ierr;
702   PetscMPIInt    n;
703   PetscInt       i,j,rstart,ncols,flg;
704   PetscInt       *row,*col;
705   PetscBool      other_disassembled;
706   PetscScalar    *val;
707 
708   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
709 
710   PetscFunctionBegin;
711   if (!aij->donotstash && !mat->nooffprocentries) {
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n; ) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) {
719           if (row[j] != rstart) break;
720         }
721         if (j < n) ncols = j-i;
722         else       ncols = n-i;
723         /* Now assemble all these values with a single function call */
724         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
725 
726         i = j;
727       }
728     }
729     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
730   }
731   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
741     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
748   }
749   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
750   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
752 
753   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
754 
755   aij->rowvalues = 0;
756 
757   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
758   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
762     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
777   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatZeroRows_MPIAIJ"
783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
784 {
785   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
786   PetscInt      *owners = A->rmap->range;
787   PetscInt       n      = A->rmap->n;
788   PetscSF        sf;
789   PetscInt      *lrows;
790   PetscSFNode   *rrows;
791   PetscInt       r, p = 0, len = 0;
792   PetscErrorCode ierr;
793 
794   PetscFunctionBegin;
795   /* Create SF where leaves are input rows and roots are owned rows */
796   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
797   for (r = 0; r < n; ++r) lrows[r] = -1;
798   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
799   for (r = 0; r < N; ++r) {
800     const PetscInt idx   = rows[r];
801     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
802     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
803       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
804     }
805     if (A->nooffproczerorows) {
806       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
807       lrows[len++] = idx - owners[p];
808     } else {
809       rrows[r].rank = p;
810       rrows[r].index = rows[r] - owners[p];
811     }
812   }
813   if (!A->nooffproczerorows) {
814     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
815     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
816     /* Collect flags for rows to be zeroed */
817     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
818     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
819     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
820     /* Compress and put in row numbers */
821     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
822   }
823   /* fix right hand side if needed */
824   if (x && b) {
825     const PetscScalar *xx;
826     PetscScalar       *bb;
827 
828     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
829     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
830     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
831     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
833   }
834   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
835   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
836   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
837     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
838   } else if (diag != 0.0) {
839     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
841     for (r = 0; r < len; ++r) {
842       const PetscInt row = lrows[r] + A->rmap->rstart;
843       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
844     }
845     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
846     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
847   } else {
848     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
849   }
850   ierr = PetscFree(lrows);CHKERRQ(ierr);
851 
852   /* only change matrix nonzero state if pattern was allowed to be changed */
853   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
854     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
855     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
863 {
864   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
865   PetscErrorCode    ierr;
866   PetscMPIInt       n = A->rmap->n;
867   PetscInt          i,j,r,m,p = 0,len = 0;
868   PetscInt          *lrows,*owners = A->rmap->range;
869   PetscSFNode       *rrows;
870   PetscSF           sf;
871   const PetscScalar *xx;
872   PetscScalar       *bb,*mask;
873   Vec               xmask,lmask;
874   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
875   const PetscInt    *aj, *ii,*ridx;
876   PetscScalar       *aa;
877 
878   PetscFunctionBegin;
879   /* Create SF where leaves are input rows and roots are owned rows */
880   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
881   for (r = 0; r < n; ++r) lrows[r] = -1;
882   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
883   for (r = 0; r < N; ++r) {
884     const PetscInt idx   = rows[r];
885     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
886     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
887       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
888     }
889     rrows[r].rank  = p;
890     rrows[r].index = rows[r] - owners[p];
891   }
892   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
893   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
894   /* Collect flags for rows to be zeroed */
895   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
896   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
897   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
898   /* Compress and put in row numbers */
899   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
900   /* zero diagonal part of matrix */
901   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
902   /* handle off diagonal part of matrix */
903   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
904   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
905   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
906   for (i=0; i<len; i++) bb[lrows[i]] = 1;
907   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
908   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
910   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
911   if (x) {
912     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
915     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
916   }
917   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
918   /* remove zeroed rows of off diagonal matrix */
919   ii = aij->i;
920   for (i=0; i<len; i++) {
921     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
922   }
923   /* loop over all elements of off process part of matrix zeroing removed columns*/
924   if (aij->compressedrow.use) {
925     m    = aij->compressedrow.nrows;
926     ii   = aij->compressedrow.i;
927     ridx = aij->compressedrow.rindex;
928     for (i=0; i<m; i++) {
929       n  = ii[i+1] - ii[i];
930       aj = aij->j + ii[i];
931       aa = aij->a + ii[i];
932 
933       for (j=0; j<n; j++) {
934         if (PetscAbsScalar(mask[*aj])) {
935           if (b) bb[*ridx] -= *aa*xx[*aj];
936           *aa = 0.0;
937         }
938         aa++;
939         aj++;
940       }
941       ridx++;
942     }
943   } else { /* do not use compressed row format */
944     m = l->B->rmap->n;
945     for (i=0; i<m; i++) {
946       n  = ii[i+1] - ii[i];
947       aj = aij->j + ii[i];
948       aa = aij->a + ii[i];
949       for (j=0; j<n; j++) {
950         if (PetscAbsScalar(mask[*aj])) {
951           if (b) bb[i] -= *aa*xx[*aj];
952           *aa = 0.0;
953         }
954         aa++;
955         aj++;
956       }
957     }
958   }
959   if (x) {
960     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
961     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
962   }
963   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
964   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
965   ierr = PetscFree(lrows);CHKERRQ(ierr);
966 
967   /* only change matrix nonzero state if pattern was allowed to be changed */
968   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
969     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
970     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
971   }
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatMult_MPIAIJ"
977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981   PetscInt       nt;
982 
983   PetscFunctionBegin;
984   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
985   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
986   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
988   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
996 {
997   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
998   PetscErrorCode ierr;
999 
1000   PetscFunctionBegin;
1001   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1008 {
1009   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1010   PetscErrorCode ierr;
1011 
1012   PetscFunctionBegin;
1013   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1015   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1023 {
1024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1025   PetscErrorCode ierr;
1026   PetscBool      merged;
1027 
1028   PetscFunctionBegin;
1029   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1030   /* do nondiagonal part */
1031   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1032   if (!merged) {
1033     /* send it on its way */
1034     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1035     /* do local part */
1036     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1037     /* receive remote parts: note this assumes the values are not actually */
1038     /* added in yy until the next line, */
1039     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1040   } else {
1041     /* do local part */
1042     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1043     /* send it on its way */
1044     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1045     /* values actually were received in the Begin() but we need to call this nop */
1046     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1053 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1054 {
1055   MPI_Comm       comm;
1056   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1057   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1058   IS             Me,Notme;
1059   PetscErrorCode ierr;
1060   PetscInt       M,N,first,last,*notme,i;
1061   PetscMPIInt    size;
1062 
1063   PetscFunctionBegin;
1064   /* Easy test: symmetric diagonal block */
1065   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1066   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1067   if (!*f) PetscFunctionReturn(0);
1068   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1069   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1070   if (size == 1) PetscFunctionReturn(0);
1071 
1072   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1073   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1074   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1075   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1076   for (i=0; i<first; i++) notme[i] = i;
1077   for (i=last; i<M; i++) notme[i-last+first] = i;
1078   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1079   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1080   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1081   Aoff = Aoffs[0];
1082   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1083   Boff = Boffs[0];
1084   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1085   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1086   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1087   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1088   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1089   ierr = PetscFree(notme);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1096 {
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101   /* do nondiagonal part */
1102   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1103   /* send it on its way */
1104   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1105   /* do local part */
1106   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1107   /* receive remote parts */
1108   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 /*
1113   This only works correctly for square matrices where the subblock A->A is the
1114    diagonal block
1115 */
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1119 {
1120   PetscErrorCode ierr;
1121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1122 
1123   PetscFunctionBegin;
1124   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1125   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1126   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatScale_MPIAIJ"
1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1139   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatDestroy_MPIAIJ"
1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1146 {
1147   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151 #if defined(PETSC_USE_LOG)
1152   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1153 #endif
1154   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1155   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1156   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1157   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1158 #if defined(PETSC_USE_CTABLE)
1159   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1160 #else
1161   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1162 #endif
1163   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1164   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1165   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1166   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1167   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1168   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1169 
1170   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1179 #if defined(PETSC_HAVE_ELEMENTAL)
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1181 #endif
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1188 {
1189   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1190   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1191   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1192   PetscErrorCode ierr;
1193   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1194   int            fd;
1195   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1196   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1197   PetscScalar    *column_values;
1198   PetscInt       message_count,flowcontrolcount;
1199   FILE           *file;
1200 
1201   PetscFunctionBegin;
1202   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1203   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1204   nz   = A->nz + B->nz;
1205   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1206   if (!rank) {
1207     header[0] = MAT_FILE_CLASSID;
1208     header[1] = mat->rmap->N;
1209     header[2] = mat->cmap->N;
1210 
1211     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1212     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1213     /* get largest number of rows any processor has */
1214     rlen  = mat->rmap->n;
1215     range = mat->rmap->range;
1216     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1217   } else {
1218     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     rlen = mat->rmap->n;
1220   }
1221 
1222   /* load up the local row counts */
1223   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1224   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1225 
1226   /* store the row lengths to the file */
1227   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1228   if (!rank) {
1229     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1230     for (i=1; i<size; i++) {
1231       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1232       rlen = range[i+1] - range[i];
1233       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1235     }
1236     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1237   } else {
1238     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1239     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1241   }
1242   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1243 
1244   /* load up the local column indices */
1245   nzmax = nz; /* th processor needs space a largest processor needs */
1246   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1247   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1248   cnt   = 0;
1249   for (i=0; i<mat->rmap->n; i++) {
1250     for (j=B->i[i]; j<B->i[i+1]; j++) {
1251       if ((col = garray[B->j[j]]) > cstart) break;
1252       column_indices[cnt++] = col;
1253     }
1254     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1255     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1256   }
1257   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1258 
1259   /* store the column indices to the file */
1260   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1261   if (!rank) {
1262     MPI_Status status;
1263     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     for (i=1; i<size; i++) {
1265       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1266       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1267       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1268       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1270     }
1271     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1272   } else {
1273     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1274     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1275     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1277   }
1278   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1279 
1280   /* load up the local column values */
1281   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1282   cnt  = 0;
1283   for (i=0; i<mat->rmap->n; i++) {
1284     for (j=B->i[i]; j<B->i[i+1]; j++) {
1285       if (garray[B->j[j]] > cstart) break;
1286       column_values[cnt++] = B->a[j];
1287     }
1288     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1289     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1290   }
1291   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1292 
1293   /* store the column values to the file */
1294   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1295   if (!rank) {
1296     MPI_Status status;
1297     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1298     for (i=1; i<size; i++) {
1299       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1300       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1301       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1302       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1303       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1304     }
1305     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1306   } else {
1307     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1308     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1309     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1310     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1311   }
1312   ierr = PetscFree(column_values);CHKERRQ(ierr);
1313 
1314   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1315   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #include <petscdraw.h>
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1323 {
1324   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1325   PetscErrorCode    ierr;
1326   PetscMPIInt       rank = aij->rank,size = aij->size;
1327   PetscBool         isdraw,iascii,isbinary;
1328   PetscViewer       sviewer;
1329   PetscViewerFormat format;
1330 
1331   PetscFunctionBegin;
1332   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1333   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1334   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1335   if (iascii) {
1336     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1337     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1338       MatInfo   info;
1339       PetscBool inodes;
1340 
1341       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1342       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1343       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1344       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1345       if (!inodes) {
1346         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1347                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1348       } else {
1349         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1350                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1351       }
1352       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1353       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1354       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1355       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1356       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1357       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1358       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1359       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1360       PetscFunctionReturn(0);
1361     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1362       PetscInt inodecount,inodelimit,*inodes;
1363       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1364       if (inodes) {
1365         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1366       } else {
1367         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1368       }
1369       PetscFunctionReturn(0);
1370     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1371       PetscFunctionReturn(0);
1372     }
1373   } else if (isbinary) {
1374     if (size == 1) {
1375       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1376       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1377     } else {
1378       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1379     }
1380     PetscFunctionReturn(0);
1381   } else if (isdraw) {
1382     PetscDraw draw;
1383     PetscBool isnull;
1384     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1385     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1386   }
1387 
1388   {
1389     /* assemble the entire matrix onto first processor. */
1390     Mat        A;
1391     Mat_SeqAIJ *Aloc;
1392     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1393     MatScalar  *a;
1394 
1395     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1396     if (!rank) {
1397       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1398     } else {
1399       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1400     }
1401     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1402     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1403     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1404     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1405     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1406 
1407     /* copy over the A part */
1408     Aloc = (Mat_SeqAIJ*)aij->A->data;
1409     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1410     row  = mat->rmap->rstart;
1411     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1412     for (i=0; i<m; i++) {
1413       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1414       row++;
1415       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1416     }
1417     aj = Aloc->j;
1418     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1419 
1420     /* copy over the B part */
1421     Aloc = (Mat_SeqAIJ*)aij->B->data;
1422     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1423     row  = mat->rmap->rstart;
1424     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1425     ct   = cols;
1426     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1427     for (i=0; i<m; i++) {
1428       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1429       row++;
1430       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1431     }
1432     ierr = PetscFree(ct);CHKERRQ(ierr);
1433     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1434     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1435     /*
1436        Everyone has to call to draw the matrix since the graphics waits are
1437        synchronized across all processors that share the PetscDraw object
1438     */
1439     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1440     if (!rank) {
1441       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1442       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1443     }
1444     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1445     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1446     ierr = MatDestroy(&A);CHKERRQ(ierr);
1447   }
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatView_MPIAIJ"
1453 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1454 {
1455   PetscErrorCode ierr;
1456   PetscBool      iascii,isdraw,issocket,isbinary;
1457 
1458   PetscFunctionBegin;
1459   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1460   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1461   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1462   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1463   if (iascii || isdraw || isbinary || issocket) {
1464     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatSOR_MPIAIJ"
1471 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1472 {
1473   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1474   PetscErrorCode ierr;
1475   Vec            bb1 = 0;
1476   PetscBool      hasop;
1477 
1478   PetscFunctionBegin;
1479   if (flag == SOR_APPLY_UPPER) {
1480     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1481     PetscFunctionReturn(0);
1482   }
1483 
1484   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1485     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1486   }
1487 
1488   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1489     if (flag & SOR_ZERO_INITIAL_GUESS) {
1490       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1491       its--;
1492     }
1493 
1494     while (its--) {
1495       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1496       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1497 
1498       /* update rhs: bb1 = bb - B*x */
1499       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1500       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1501 
1502       /* local sweep */
1503       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1504     }
1505   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1506     if (flag & SOR_ZERO_INITIAL_GUESS) {
1507       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1508       its--;
1509     }
1510     while (its--) {
1511       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1512       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1513 
1514       /* update rhs: bb1 = bb - B*x */
1515       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1516       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1517 
1518       /* local sweep */
1519       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1520     }
1521   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1522     if (flag & SOR_ZERO_INITIAL_GUESS) {
1523       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1524       its--;
1525     }
1526     while (its--) {
1527       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1528       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1529 
1530       /* update rhs: bb1 = bb - B*x */
1531       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1532       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1533 
1534       /* local sweep */
1535       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1536     }
1537   } else if (flag & SOR_EISENSTAT) {
1538     Vec xx1;
1539 
1540     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1541     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1542 
1543     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1544     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1545     if (!mat->diag) {
1546       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1547       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1548     }
1549     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1550     if (hasop) {
1551       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1552     } else {
1553       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1554     }
1555     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1556 
1557     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1558 
1559     /* local sweep */
1560     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1561     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1562     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1563   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1564 
1565   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatPermute_MPIAIJ"
1571 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1572 {
1573   Mat            aA,aB,Aperm;
1574   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1575   PetscScalar    *aa,*ba;
1576   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1577   PetscSF        rowsf,sf;
1578   IS             parcolp = NULL;
1579   PetscBool      done;
1580   PetscErrorCode ierr;
1581 
1582   PetscFunctionBegin;
1583   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1584   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1585   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1586   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1587 
1588   /* Invert row permutation to find out where my rows should go */
1589   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1590   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1591   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1592   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1593   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1594   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1595 
1596   /* Invert column permutation to find out where my columns should go */
1597   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1598   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1599   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1600   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1601   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1602   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1603   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1604 
1605   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1606   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1607   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1608 
1609   /* Find out where my gcols should go */
1610   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1611   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1612   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1613   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1614   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1615   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1616   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1617   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1618 
1619   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1620   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1621   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1622   for (i=0; i<m; i++) {
1623     PetscInt row = rdest[i],rowner;
1624     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1625     for (j=ai[i]; j<ai[i+1]; j++) {
1626       PetscInt cowner,col = cdest[aj[j]];
1627       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1628       if (rowner == cowner) dnnz[i]++;
1629       else onnz[i]++;
1630     }
1631     for (j=bi[i]; j<bi[i+1]; j++) {
1632       PetscInt cowner,col = gcdest[bj[j]];
1633       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1634       if (rowner == cowner) dnnz[i]++;
1635       else onnz[i]++;
1636     }
1637   }
1638   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1639   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1640   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1641   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1642   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1643 
1644   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1645   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1646   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1647   for (i=0; i<m; i++) {
1648     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1649     PetscInt j0,rowlen;
1650     rowlen = ai[i+1] - ai[i];
1651     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1652       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1653       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1654     }
1655     rowlen = bi[i+1] - bi[i];
1656     for (j0=j=0; j<rowlen; j0=j) {
1657       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1658       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1659     }
1660   }
1661   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1662   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1663   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1664   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1665   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1666   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1667   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1668   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1669   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1670   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1671   *B = Aperm;
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatGetGhosts_MPIAIJ"
1677 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1678 {
1679   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1680   PetscErrorCode ierr;
1681 
1682   PetscFunctionBegin;
1683   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1684   if (ghosts) *ghosts = aij->garray;
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1690 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1691 {
1692   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1693   Mat            A    = mat->A,B = mat->B;
1694   PetscErrorCode ierr;
1695   PetscReal      isend[5],irecv[5];
1696 
1697   PetscFunctionBegin;
1698   info->block_size = 1.0;
1699   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1700 
1701   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1702   isend[3] = info->memory;  isend[4] = info->mallocs;
1703 
1704   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1705 
1706   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1707   isend[3] += info->memory;  isend[4] += info->mallocs;
1708   if (flag == MAT_LOCAL) {
1709     info->nz_used      = isend[0];
1710     info->nz_allocated = isend[1];
1711     info->nz_unneeded  = isend[2];
1712     info->memory       = isend[3];
1713     info->mallocs      = isend[4];
1714   } else if (flag == MAT_GLOBAL_MAX) {
1715     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1716 
1717     info->nz_used      = irecv[0];
1718     info->nz_allocated = irecv[1];
1719     info->nz_unneeded  = irecv[2];
1720     info->memory       = irecv[3];
1721     info->mallocs      = irecv[4];
1722   } else if (flag == MAT_GLOBAL_SUM) {
1723     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1724 
1725     info->nz_used      = irecv[0];
1726     info->nz_allocated = irecv[1];
1727     info->nz_unneeded  = irecv[2];
1728     info->memory       = irecv[3];
1729     info->mallocs      = irecv[4];
1730   }
1731   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1732   info->fill_ratio_needed = 0;
1733   info->factor_mallocs    = 0;
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 #undef __FUNCT__
1738 #define __FUNCT__ "MatSetOption_MPIAIJ"
1739 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1740 {
1741   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1742   PetscErrorCode ierr;
1743 
1744   PetscFunctionBegin;
1745   switch (op) {
1746   case MAT_NEW_NONZERO_LOCATIONS:
1747   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1748   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1749   case MAT_KEEP_NONZERO_PATTERN:
1750   case MAT_NEW_NONZERO_LOCATION_ERR:
1751   case MAT_USE_INODES:
1752   case MAT_IGNORE_ZERO_ENTRIES:
1753     MatCheckPreallocated(A,1);
1754     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1755     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1756     break;
1757   case MAT_ROW_ORIENTED:
1758     a->roworiented = flg;
1759 
1760     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1761     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1762     break;
1763   case MAT_NEW_DIAGONALS:
1764     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1765     break;
1766   case MAT_IGNORE_OFF_PROC_ENTRIES:
1767     a->donotstash = flg;
1768     break;
1769   case MAT_SPD:
1770     A->spd_set = PETSC_TRUE;
1771     A->spd     = flg;
1772     if (flg) {
1773       A->symmetric                  = PETSC_TRUE;
1774       A->structurally_symmetric     = PETSC_TRUE;
1775       A->symmetric_set              = PETSC_TRUE;
1776       A->structurally_symmetric_set = PETSC_TRUE;
1777     }
1778     break;
1779   case MAT_SYMMETRIC:
1780     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1781     break;
1782   case MAT_STRUCTURALLY_SYMMETRIC:
1783     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1784     break;
1785   case MAT_HERMITIAN:
1786     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1787     break;
1788   case MAT_SYMMETRY_ETERNAL:
1789     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1790     break;
1791   default:
1792     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1793   }
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatGetRow_MPIAIJ"
1799 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1800 {
1801   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1802   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1803   PetscErrorCode ierr;
1804   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1805   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1806   PetscInt       *cmap,*idx_p;
1807 
1808   PetscFunctionBegin;
1809   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1810   mat->getrowactive = PETSC_TRUE;
1811 
1812   if (!mat->rowvalues && (idx || v)) {
1813     /*
1814         allocate enough space to hold information from the longest row.
1815     */
1816     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1817     PetscInt   max = 1,tmp;
1818     for (i=0; i<matin->rmap->n; i++) {
1819       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1820       if (max < tmp) max = tmp;
1821     }
1822     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1823   }
1824 
1825   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1826   lrow = row - rstart;
1827 
1828   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1829   if (!v)   {pvA = 0; pvB = 0;}
1830   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1831   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1832   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1833   nztot = nzA + nzB;
1834 
1835   cmap = mat->garray;
1836   if (v  || idx) {
1837     if (nztot) {
1838       /* Sort by increasing column numbers, assuming A and B already sorted */
1839       PetscInt imark = -1;
1840       if (v) {
1841         *v = v_p = mat->rowvalues;
1842         for (i=0; i<nzB; i++) {
1843           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1844           else break;
1845         }
1846         imark = i;
1847         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1848         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1849       }
1850       if (idx) {
1851         *idx = idx_p = mat->rowindices;
1852         if (imark > -1) {
1853           for (i=0; i<imark; i++) {
1854             idx_p[i] = cmap[cworkB[i]];
1855           }
1856         } else {
1857           for (i=0; i<nzB; i++) {
1858             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1859             else break;
1860           }
1861           imark = i;
1862         }
1863         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1864         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1865       }
1866     } else {
1867       if (idx) *idx = 0;
1868       if (v)   *v   = 0;
1869     }
1870   }
1871   *nz  = nztot;
1872   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1873   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1879 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1880 {
1881   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1882 
1883   PetscFunctionBegin;
1884   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1885   aij->getrowactive = PETSC_FALSE;
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 #undef __FUNCT__
1890 #define __FUNCT__ "MatNorm_MPIAIJ"
1891 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1892 {
1893   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1894   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1895   PetscErrorCode ierr;
1896   PetscInt       i,j,cstart = mat->cmap->rstart;
1897   PetscReal      sum = 0.0;
1898   MatScalar      *v;
1899 
1900   PetscFunctionBegin;
1901   if (aij->size == 1) {
1902     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1903   } else {
1904     if (type == NORM_FROBENIUS) {
1905       v = amat->a;
1906       for (i=0; i<amat->nz; i++) {
1907         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1908       }
1909       v = bmat->a;
1910       for (i=0; i<bmat->nz; i++) {
1911         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1912       }
1913       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1914       *norm = PetscSqrtReal(*norm);
1915     } else if (type == NORM_1) { /* max column norm */
1916       PetscReal *tmp,*tmp2;
1917       PetscInt  *jj,*garray = aij->garray;
1918       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1919       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1920       *norm = 0.0;
1921       v     = amat->a; jj = amat->j;
1922       for (j=0; j<amat->nz; j++) {
1923         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1924       }
1925       v = bmat->a; jj = bmat->j;
1926       for (j=0; j<bmat->nz; j++) {
1927         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1928       }
1929       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1930       for (j=0; j<mat->cmap->N; j++) {
1931         if (tmp2[j] > *norm) *norm = tmp2[j];
1932       }
1933       ierr = PetscFree(tmp);CHKERRQ(ierr);
1934       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1935     } else if (type == NORM_INFINITY) { /* max row norm */
1936       PetscReal ntemp = 0.0;
1937       for (j=0; j<aij->A->rmap->n; j++) {
1938         v   = amat->a + amat->i[j];
1939         sum = 0.0;
1940         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1941           sum += PetscAbsScalar(*v); v++;
1942         }
1943         v = bmat->a + bmat->i[j];
1944         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1945           sum += PetscAbsScalar(*v); v++;
1946         }
1947         if (sum > ntemp) ntemp = sum;
1948       }
1949       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1950     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1951   }
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 #undef __FUNCT__
1956 #define __FUNCT__ "MatTranspose_MPIAIJ"
1957 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1958 {
1959   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1960   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1961   PetscErrorCode ierr;
1962   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1963   PetscInt       cstart = A->cmap->rstart,ncol;
1964   Mat            B;
1965   MatScalar      *array;
1966 
1967   PetscFunctionBegin;
1968   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1969 
1970   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1971   ai = Aloc->i; aj = Aloc->j;
1972   bi = Bloc->i; bj = Bloc->j;
1973   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1974     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1975     PetscSFNode          *oloc;
1976     PETSC_UNUSED PetscSF sf;
1977 
1978     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1979     /* compute d_nnz for preallocation */
1980     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1981     for (i=0; i<ai[ma]; i++) {
1982       d_nnz[aj[i]]++;
1983       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1984     }
1985     /* compute local off-diagonal contributions */
1986     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1987     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1988     /* map those to global */
1989     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1990     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1991     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1992     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1993     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1994     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1995     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1996 
1997     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1998     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1999     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2000     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2001     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2002     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2003   } else {
2004     B    = *matout;
2005     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2006     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2007   }
2008 
2009   /* copy over the A part */
2010   array = Aloc->a;
2011   row   = A->rmap->rstart;
2012   for (i=0; i<ma; i++) {
2013     ncol = ai[i+1]-ai[i];
2014     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2015     row++;
2016     array += ncol; aj += ncol;
2017   }
2018   aj = Aloc->j;
2019   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2020 
2021   /* copy over the B part */
2022   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2023   array = Bloc->a;
2024   row   = A->rmap->rstart;
2025   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2026   cols_tmp = cols;
2027   for (i=0; i<mb; i++) {
2028     ncol = bi[i+1]-bi[i];
2029     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2030     row++;
2031     array += ncol; cols_tmp += ncol;
2032   }
2033   ierr = PetscFree(cols);CHKERRQ(ierr);
2034 
2035   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2036   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2037   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2038     *matout = B;
2039   } else {
2040     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2041   }
2042   PetscFunctionReturn(0);
2043 }
2044 
2045 #undef __FUNCT__
2046 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2047 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2048 {
2049   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2050   Mat            a    = aij->A,b = aij->B;
2051   PetscErrorCode ierr;
2052   PetscInt       s1,s2,s3;
2053 
2054   PetscFunctionBegin;
2055   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2056   if (rr) {
2057     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2058     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2059     /* Overlap communication with computation. */
2060     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2061   }
2062   if (ll) {
2063     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2064     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2065     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2066   }
2067   /* scale  the diagonal block */
2068   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2069 
2070   if (rr) {
2071     /* Do a scatter end and then right scale the off-diagonal block */
2072     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2073     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2074   }
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2080 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2081 {
2082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2083   PetscErrorCode ierr;
2084 
2085   PetscFunctionBegin;
2086   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatEqual_MPIAIJ"
2092 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2093 {
2094   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2095   Mat            a,b,c,d;
2096   PetscBool      flg;
2097   PetscErrorCode ierr;
2098 
2099   PetscFunctionBegin;
2100   a = matA->A; b = matA->B;
2101   c = matB->A; d = matB->B;
2102 
2103   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2104   if (flg) {
2105     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2106   }
2107   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 #undef __FUNCT__
2112 #define __FUNCT__ "MatCopy_MPIAIJ"
2113 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2114 {
2115   PetscErrorCode ierr;
2116   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2117   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2118 
2119   PetscFunctionBegin;
2120   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2121   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2122     /* because of the column compression in the off-processor part of the matrix a->B,
2123        the number of columns in a->B and b->B may be different, hence we cannot call
2124        the MatCopy() directly on the two parts. If need be, we can provide a more
2125        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2126        then copying the submatrices */
2127     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2128   } else {
2129     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2130     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2131   }
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatSetUp_MPIAIJ"
2137 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2138 {
2139   PetscErrorCode ierr;
2140 
2141   PetscFunctionBegin;
2142   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 /*
2147    Computes the number of nonzeros per row needed for preallocation when X and Y
2148    have different nonzero structure.
2149 */
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2152 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2153 {
2154   PetscInt       i,j,k,nzx,nzy;
2155 
2156   PetscFunctionBegin;
2157   /* Set the number of nonzeros in the new matrix */
2158   for (i=0; i<m; i++) {
2159     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2160     nzx = xi[i+1] - xi[i];
2161     nzy = yi[i+1] - yi[i];
2162     nnz[i] = 0;
2163     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2164       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2165       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2166       nnz[i]++;
2167     }
2168     for (; k<nzy; k++) nnz[i]++;
2169   }
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2176 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2177 {
2178   PetscErrorCode ierr;
2179   PetscInt       m = Y->rmap->N;
2180   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2181   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2182 
2183   PetscFunctionBegin;
2184   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 #undef __FUNCT__
2189 #define __FUNCT__ "MatAXPY_MPIAIJ"
2190 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2191 {
2192   PetscErrorCode ierr;
2193   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2194   PetscBLASInt   bnz,one=1;
2195   Mat_SeqAIJ     *x,*y;
2196 
2197   PetscFunctionBegin;
2198   if (str == SAME_NONZERO_PATTERN) {
2199     PetscScalar alpha = a;
2200     x    = (Mat_SeqAIJ*)xx->A->data;
2201     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2202     y    = (Mat_SeqAIJ*)yy->A->data;
2203     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2204     x    = (Mat_SeqAIJ*)xx->B->data;
2205     y    = (Mat_SeqAIJ*)yy->B->data;
2206     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2207     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2208     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2209   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2210     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2211   } else {
2212     Mat      B;
2213     PetscInt *nnz_d,*nnz_o;
2214     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2215     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2216     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2217     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2218     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2219     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2220     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2221     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2222     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2223     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2224     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2225     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2226     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2227     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2228   }
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2233 
2234 #undef __FUNCT__
2235 #define __FUNCT__ "MatConjugate_MPIAIJ"
2236 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2237 {
2238 #if defined(PETSC_USE_COMPLEX)
2239   PetscErrorCode ierr;
2240   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2241 
2242   PetscFunctionBegin;
2243   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2244   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2245 #else
2246   PetscFunctionBegin;
2247 #endif
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 #undef __FUNCT__
2252 #define __FUNCT__ "MatRealPart_MPIAIJ"
2253 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2254 {
2255   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2256   PetscErrorCode ierr;
2257 
2258   PetscFunctionBegin;
2259   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2260   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2266 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2267 {
2268   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2269   PetscErrorCode ierr;
2270 
2271   PetscFunctionBegin;
2272   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2273   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 #undef __FUNCT__
2278 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2279 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2280 {
2281   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2282   PetscErrorCode ierr;
2283   PetscInt       i,*idxb = 0;
2284   PetscScalar    *va,*vb;
2285   Vec            vtmp;
2286 
2287   PetscFunctionBegin;
2288   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2289   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2290   if (idx) {
2291     for (i=0; i<A->rmap->n; i++) {
2292       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2293     }
2294   }
2295 
2296   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2297   if (idx) {
2298     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2299   }
2300   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2301   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2302 
2303   for (i=0; i<A->rmap->n; i++) {
2304     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2305       va[i] = vb[i];
2306       if (idx) idx[i] = a->garray[idxb[i]];
2307     }
2308   }
2309 
2310   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2311   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2312   ierr = PetscFree(idxb);CHKERRQ(ierr);
2313   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 #undef __FUNCT__
2318 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2319 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2320 {
2321   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2322   PetscErrorCode ierr;
2323   PetscInt       i,*idxb = 0;
2324   PetscScalar    *va,*vb;
2325   Vec            vtmp;
2326 
2327   PetscFunctionBegin;
2328   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2329   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2330   if (idx) {
2331     for (i=0; i<A->cmap->n; i++) {
2332       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2333     }
2334   }
2335 
2336   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2337   if (idx) {
2338     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2339   }
2340   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2341   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2342 
2343   for (i=0; i<A->rmap->n; i++) {
2344     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2345       va[i] = vb[i];
2346       if (idx) idx[i] = a->garray[idxb[i]];
2347     }
2348   }
2349 
2350   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2351   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2352   ierr = PetscFree(idxb);CHKERRQ(ierr);
2353   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 #undef __FUNCT__
2358 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2359 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2360 {
2361   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2362   PetscInt       n      = A->rmap->n;
2363   PetscInt       cstart = A->cmap->rstart;
2364   PetscInt       *cmap  = mat->garray;
2365   PetscInt       *diagIdx, *offdiagIdx;
2366   Vec            diagV, offdiagV;
2367   PetscScalar    *a, *diagA, *offdiagA;
2368   PetscInt       r;
2369   PetscErrorCode ierr;
2370 
2371   PetscFunctionBegin;
2372   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2373   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2374   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2375   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2376   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2377   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2378   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2379   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2380   for (r = 0; r < n; ++r) {
2381     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2382       a[r]   = diagA[r];
2383       idx[r] = cstart + diagIdx[r];
2384     } else {
2385       a[r]   = offdiagA[r];
2386       idx[r] = cmap[offdiagIdx[r]];
2387     }
2388   }
2389   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2390   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2391   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2392   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2393   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2394   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 #undef __FUNCT__
2399 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2400 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2401 {
2402   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2403   PetscInt       n      = A->rmap->n;
2404   PetscInt       cstart = A->cmap->rstart;
2405   PetscInt       *cmap  = mat->garray;
2406   PetscInt       *diagIdx, *offdiagIdx;
2407   Vec            diagV, offdiagV;
2408   PetscScalar    *a, *diagA, *offdiagA;
2409   PetscInt       r;
2410   PetscErrorCode ierr;
2411 
2412   PetscFunctionBegin;
2413   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2414   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2415   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2416   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2417   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2418   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2419   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2420   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2421   for (r = 0; r < n; ++r) {
2422     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2423       a[r]   = diagA[r];
2424       idx[r] = cstart + diagIdx[r];
2425     } else {
2426       a[r]   = offdiagA[r];
2427       idx[r] = cmap[offdiagIdx[r]];
2428     }
2429   }
2430   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2431   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2432   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2433   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2434   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2435   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 #undef __FUNCT__
2440 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2441 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2442 {
2443   PetscErrorCode ierr;
2444   Mat            *dummy;
2445 
2446   PetscFunctionBegin;
2447   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2448   *newmat = *dummy;
2449   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2455 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2456 {
2457   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2458   PetscErrorCode ierr;
2459 
2460   PetscFunctionBegin;
2461   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatSetRandom_MPIAIJ"
2467 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2468 {
2469   PetscErrorCode ierr;
2470   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2471 
2472   PetscFunctionBegin;
2473   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2474   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2475   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2476   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ"
2482 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2483 {
2484   PetscFunctionBegin;
2485   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2486   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 #undef __FUNCT__
2491 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap"
2492 /*@
2493    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2494 
2495    Collective on Mat
2496 
2497    Input Parameters:
2498 +    A - the matrix
2499 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2500 
2501 @*/
2502 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2503 {
2504   PetscErrorCode       ierr;
2505 
2506   PetscFunctionBegin;
2507   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 #undef __FUNCT__
2512 #define __FUNCT__ "MatSetFromOptions_MPIAIJ"
2513 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptions *PetscOptionsObject,Mat A)
2514 {
2515   PetscErrorCode       ierr;
2516   PetscBool            sc = PETSC_FALSE,flg;
2517 
2518   PetscFunctionBegin;
2519   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2520   ierr = PetscObjectOptionsBegin((PetscObject)A);
2521     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2522     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2523     if (flg) {
2524       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2525     }
2526   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 #undef __FUNCT__
2531 #define __FUNCT__ "MatShift_MPIAIJ"
2532 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2533 {
2534   PetscErrorCode ierr;
2535   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2536   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2537 
2538   PetscFunctionBegin;
2539   if (!Y->preallocated) {
2540     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2541   } else if (!aij->nz) {
2542     PetscInt nonew = aij->nonew;
2543     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2544     aij->nonew = nonew;
2545   }
2546   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 #undef __FUNCT__
2551 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ"
2552 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2553 {
2554   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2555   PetscErrorCode ierr;
2556 
2557   PetscFunctionBegin;
2558   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2559   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2560   if (d) {
2561     PetscInt rstart;
2562     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2563     *d += rstart;
2564 
2565   }
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 
2570 /* -------------------------------------------------------------------*/
2571 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2572                                        MatGetRow_MPIAIJ,
2573                                        MatRestoreRow_MPIAIJ,
2574                                        MatMult_MPIAIJ,
2575                                 /* 4*/ MatMultAdd_MPIAIJ,
2576                                        MatMultTranspose_MPIAIJ,
2577                                        MatMultTransposeAdd_MPIAIJ,
2578                                        0,
2579                                        0,
2580                                        0,
2581                                 /*10*/ 0,
2582                                        0,
2583                                        0,
2584                                        MatSOR_MPIAIJ,
2585                                        MatTranspose_MPIAIJ,
2586                                 /*15*/ MatGetInfo_MPIAIJ,
2587                                        MatEqual_MPIAIJ,
2588                                        MatGetDiagonal_MPIAIJ,
2589                                        MatDiagonalScale_MPIAIJ,
2590                                        MatNorm_MPIAIJ,
2591                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2592                                        MatAssemblyEnd_MPIAIJ,
2593                                        MatSetOption_MPIAIJ,
2594                                        MatZeroEntries_MPIAIJ,
2595                                 /*24*/ MatZeroRows_MPIAIJ,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                        0,
2600                                 /*29*/ MatSetUp_MPIAIJ,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                 /*34*/ MatDuplicate_MPIAIJ,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                 /*39*/ MatAXPY_MPIAIJ,
2611                                        MatGetSubMatrices_MPIAIJ,
2612                                        MatIncreaseOverlap_MPIAIJ,
2613                                        MatGetValues_MPIAIJ,
2614                                        MatCopy_MPIAIJ,
2615                                 /*44*/ MatGetRowMax_MPIAIJ,
2616                                        MatScale_MPIAIJ,
2617                                        MatShift_MPIAIJ,
2618                                        MatDiagonalSet_MPIAIJ,
2619                                        MatZeroRowsColumns_MPIAIJ,
2620                                 /*49*/ MatSetRandom_MPIAIJ,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2626                                        0,
2627                                        MatSetUnfactored_MPIAIJ,
2628                                        MatPermute_MPIAIJ,
2629                                        0,
2630                                 /*59*/ MatGetSubMatrix_MPIAIJ,
2631                                        MatDestroy_MPIAIJ,
2632                                        MatView_MPIAIJ,
2633                                        0,
2634                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2635                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2636                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2641                                        MatGetRowMinAbs_MPIAIJ,
2642                                        0,
2643                                        MatSetColoring_MPIAIJ,
2644                                        0,
2645                                        MatSetValuesAdifor_MPIAIJ,
2646                                 /*75*/ MatFDColoringApply_AIJ,
2647                                        MatSetFromOptions_MPIAIJ,
2648                                        0,
2649                                        0,
2650                                        MatFindZeroDiagonals_MPIAIJ,
2651                                 /*80*/ 0,
2652                                        0,
2653                                        0,
2654                                 /*83*/ MatLoad_MPIAIJ,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2661                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2662                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2663                                        MatPtAP_MPIAIJ_MPIAIJ,
2664                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2665                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                 /*99*/ 0,
2671                                        0,
2672                                        0,
2673                                        MatConjugate_MPIAIJ,
2674                                        0,
2675                                 /*104*/MatSetValuesRow_MPIAIJ,
2676                                        MatRealPart_MPIAIJ,
2677                                        MatImaginaryPart_MPIAIJ,
2678                                        0,
2679                                        0,
2680                                 /*109*/0,
2681                                        0,
2682                                        MatGetRowMin_MPIAIJ,
2683                                        0,
2684                                        MatMissingDiagonal_MPIAIJ,
2685                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2686                                        0,
2687                                        MatGetGhosts_MPIAIJ,
2688                                        0,
2689                                        0,
2690                                 /*119*/0,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                        MatGetMultiProcBlock_MPIAIJ,
2695                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2696                                        MatGetColumnNorms_MPIAIJ,
2697                                        MatInvertBlockDiagonal_MPIAIJ,
2698                                        0,
2699                                        MatGetSubMatricesMPI_MPIAIJ,
2700                                 /*129*/0,
2701                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2702                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2703                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2704                                        0,
2705                                 /*134*/0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                 /*139*/0,
2711                                        0,
2712                                        0,
2713                                        MatFDColoringSetUp_MPIXAIJ,
2714                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2715                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2716 };
2717 
2718 /* ----------------------------------------------------------------------------------------*/
2719 
2720 #undef __FUNCT__
2721 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2722 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2723 {
2724   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2725   PetscErrorCode ierr;
2726 
2727   PetscFunctionBegin;
2728   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2729   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2730   PetscFunctionReturn(0);
2731 }
2732 
2733 #undef __FUNCT__
2734 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2735 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2736 {
2737   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2738   PetscErrorCode ierr;
2739 
2740   PetscFunctionBegin;
2741   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2742   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2743   PetscFunctionReturn(0);
2744 }
2745 
2746 #undef __FUNCT__
2747 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2748 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2749 {
2750   Mat_MPIAIJ     *b;
2751   PetscErrorCode ierr;
2752 
2753   PetscFunctionBegin;
2754   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2755   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2756   b = (Mat_MPIAIJ*)B->data;
2757 
2758   if (!B->preallocated) {
2759     /* Explicitly create 2 MATSEQAIJ matrices. */
2760     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2761     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2762     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2763     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2764     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2765     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2766     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2767     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2768     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2769     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2770   }
2771 
2772   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2773   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2774   B->preallocated = PETSC_TRUE;
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2780 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2781 {
2782   Mat            mat;
2783   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2784   PetscErrorCode ierr;
2785 
2786   PetscFunctionBegin;
2787   *newmat = 0;
2788   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2789   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2790   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2791   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2792   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2793   a       = (Mat_MPIAIJ*)mat->data;
2794 
2795   mat->factortype   = matin->factortype;
2796   mat->assembled    = PETSC_TRUE;
2797   mat->insertmode   = NOT_SET_VALUES;
2798   mat->preallocated = PETSC_TRUE;
2799 
2800   a->size         = oldmat->size;
2801   a->rank         = oldmat->rank;
2802   a->donotstash   = oldmat->donotstash;
2803   a->roworiented  = oldmat->roworiented;
2804   a->rowindices   = 0;
2805   a->rowvalues    = 0;
2806   a->getrowactive = PETSC_FALSE;
2807 
2808   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2809   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2810 
2811   if (oldmat->colmap) {
2812 #if defined(PETSC_USE_CTABLE)
2813     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2814 #else
2815     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2816     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2817     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2818 #endif
2819   } else a->colmap = 0;
2820   if (oldmat->garray) {
2821     PetscInt len;
2822     len  = oldmat->B->cmap->n;
2823     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2824     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2825     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2826   } else a->garray = 0;
2827 
2828   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2829   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2830   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2831   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2832   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2833   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2834   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2835   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2836   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2837   *newmat = mat;
2838   PetscFunctionReturn(0);
2839 }
2840 
2841 
2842 
2843 #undef __FUNCT__
2844 #define __FUNCT__ "MatLoad_MPIAIJ"
2845 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2846 {
2847   PetscScalar    *vals,*svals;
2848   MPI_Comm       comm;
2849   PetscErrorCode ierr;
2850   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2851   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2852   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2853   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2854   PetscInt       cend,cstart,n,*rowners;
2855   int            fd;
2856   PetscInt       bs = newMat->rmap->bs;
2857 
2858   PetscFunctionBegin;
2859   /* force binary viewer to load .info file if it has not yet done so */
2860   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2861   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2862   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2863   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2864   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2865   if (!rank) {
2866     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2867     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2868   }
2869 
2870   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr);
2871   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2872   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2873   if (bs < 0) bs = 1;
2874 
2875   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2876   M    = header[1]; N = header[2];
2877 
2878   /* If global sizes are set, check if they are consistent with that given in the file */
2879   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2880   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2881 
2882   /* determine ownership of all (block) rows */
2883   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2884   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2885   else m = newMat->rmap->n; /* Set by user */
2886 
2887   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2888   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2889 
2890   /* First process needs enough room for process with most rows */
2891   if (!rank) {
2892     mmax = rowners[1];
2893     for (i=2; i<=size; i++) {
2894       mmax = PetscMax(mmax, rowners[i]);
2895     }
2896   } else mmax = -1;             /* unused, but compilers complain */
2897 
2898   rowners[0] = 0;
2899   for (i=2; i<=size; i++) {
2900     rowners[i] += rowners[i-1];
2901   }
2902   rstart = rowners[rank];
2903   rend   = rowners[rank+1];
2904 
2905   /* distribute row lengths to all processors */
2906   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2907   if (!rank) {
2908     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2909     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2910     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2911     for (j=0; j<m; j++) {
2912       procsnz[0] += ourlens[j];
2913     }
2914     for (i=1; i<size; i++) {
2915       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2916       /* calculate the number of nonzeros on each processor */
2917       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2918         procsnz[i] += rowlengths[j];
2919       }
2920       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2921     }
2922     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2923   } else {
2924     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2925   }
2926 
2927   if (!rank) {
2928     /* determine max buffer needed and allocate it */
2929     maxnz = 0;
2930     for (i=0; i<size; i++) {
2931       maxnz = PetscMax(maxnz,procsnz[i]);
2932     }
2933     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2934 
2935     /* read in my part of the matrix column indices  */
2936     nz   = procsnz[0];
2937     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2938     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2939 
2940     /* read in every one elses and ship off */
2941     for (i=1; i<size; i++) {
2942       nz   = procsnz[i];
2943       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2944       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2945     }
2946     ierr = PetscFree(cols);CHKERRQ(ierr);
2947   } else {
2948     /* determine buffer space needed for message */
2949     nz = 0;
2950     for (i=0; i<m; i++) {
2951       nz += ourlens[i];
2952     }
2953     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2954 
2955     /* receive message of column indices*/
2956     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2957   }
2958 
2959   /* determine column ownership if matrix is not square */
2960   if (N != M) {
2961     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2962     else n = newMat->cmap->n;
2963     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2964     cstart = cend - n;
2965   } else {
2966     cstart = rstart;
2967     cend   = rend;
2968     n      = cend - cstart;
2969   }
2970 
2971   /* loop over local rows, determining number of off diagonal entries */
2972   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2973   jj   = 0;
2974   for (i=0; i<m; i++) {
2975     for (j=0; j<ourlens[i]; j++) {
2976       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2977       jj++;
2978     }
2979   }
2980 
2981   for (i=0; i<m; i++) {
2982     ourlens[i] -= offlens[i];
2983   }
2984   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2985 
2986   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2987 
2988   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2989 
2990   for (i=0; i<m; i++) {
2991     ourlens[i] += offlens[i];
2992   }
2993 
2994   if (!rank) {
2995     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2996 
2997     /* read in my part of the matrix numerical values  */
2998     nz   = procsnz[0];
2999     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3000 
3001     /* insert into matrix */
3002     jj      = rstart;
3003     smycols = mycols;
3004     svals   = vals;
3005     for (i=0; i<m; i++) {
3006       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3007       smycols += ourlens[i];
3008       svals   += ourlens[i];
3009       jj++;
3010     }
3011 
3012     /* read in other processors and ship out */
3013     for (i=1; i<size; i++) {
3014       nz   = procsnz[i];
3015       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3016       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3017     }
3018     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3019   } else {
3020     /* receive numeric values */
3021     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3022 
3023     /* receive message of values*/
3024     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3025 
3026     /* insert into matrix */
3027     jj      = rstart;
3028     smycols = mycols;
3029     svals   = vals;
3030     for (i=0; i<m; i++) {
3031       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3032       smycols += ourlens[i];
3033       svals   += ourlens[i];
3034       jj++;
3035     }
3036   }
3037   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3038   ierr = PetscFree(vals);CHKERRQ(ierr);
3039   ierr = PetscFree(mycols);CHKERRQ(ierr);
3040   ierr = PetscFree(rowners);CHKERRQ(ierr);
3041   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3042   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3043   PetscFunctionReturn(0);
3044 }
3045 
3046 #undef __FUNCT__
3047 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3048 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */
3049 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3050 {
3051   PetscErrorCode ierr;
3052   IS             iscol_local;
3053   PetscInt       csize;
3054 
3055   PetscFunctionBegin;
3056   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3057   if (call == MAT_REUSE_MATRIX) {
3058     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3059     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3060   } else {
3061     /* check if we are grabbing all columns*/
3062     PetscBool    isstride;
3063     PetscMPIInt  lisstride = 0,gisstride;
3064     ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3065     if (isstride) {
3066       PetscInt  start,len,mstart,mlen;
3067       ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3068       ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3069       ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3070       if (mstart == start && mlen-mstart == len) lisstride = 1;
3071     }
3072     ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3073     if (gisstride) {
3074       PetscInt N;
3075       ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3076       ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3077       ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3078       ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3079     } else {
3080       PetscInt cbs;
3081       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3082       ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3083       ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3084     }
3085   }
3086   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3087   if (call == MAT_INITIAL_MATRIX) {
3088     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3089     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3090   }
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3097 /*
3098     Not great since it makes two copies of the submatrix, first an SeqAIJ
3099   in local and then by concatenating the local matrices the end result.
3100   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3101 
3102   Note: This requires a sequential iscol with all indices.
3103 */
3104 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3105 {
3106   PetscErrorCode ierr;
3107   PetscMPIInt    rank,size;
3108   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3109   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3110   PetscBool      allcolumns, colflag;
3111   Mat            M,Mreuse;
3112   MatScalar      *vwork,*aa;
3113   MPI_Comm       comm;
3114   Mat_SeqAIJ     *aij;
3115 
3116   PetscFunctionBegin;
3117   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3118   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3119   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3120 
3121   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3122   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3123   if (colflag && ncol == mat->cmap->N) {
3124     allcolumns = PETSC_TRUE;
3125     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr);
3126   } else {
3127     allcolumns = PETSC_FALSE;
3128   }
3129   if (call ==  MAT_REUSE_MATRIX) {
3130     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3131     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3132     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3133   } else {
3134     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3135   }
3136 
3137   /*
3138       m - number of local rows
3139       n - number of columns (same on all processors)
3140       rstart - first row in new global matrix generated
3141   */
3142   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3143   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3144   if (call == MAT_INITIAL_MATRIX) {
3145     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3146     ii  = aij->i;
3147     jj  = aij->j;
3148 
3149     /*
3150         Determine the number of non-zeros in the diagonal and off-diagonal
3151         portions of the matrix in order to do correct preallocation
3152     */
3153 
3154     /* first get start and end of "diagonal" columns */
3155     if (csize == PETSC_DECIDE) {
3156       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3157       if (mglobal == n) { /* square matrix */
3158         nlocal = m;
3159       } else {
3160         nlocal = n/size + ((n % size) > rank);
3161       }
3162     } else {
3163       nlocal = csize;
3164     }
3165     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3166     rstart = rend - nlocal;
3167     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3168 
3169     /* next, compute all the lengths */
3170     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3171     olens = dlens + m;
3172     for (i=0; i<m; i++) {
3173       jend = ii[i+1] - ii[i];
3174       olen = 0;
3175       dlen = 0;
3176       for (j=0; j<jend; j++) {
3177         if (*jj < rstart || *jj >= rend) olen++;
3178         else dlen++;
3179         jj++;
3180       }
3181       olens[i] = olen;
3182       dlens[i] = dlen;
3183     }
3184     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3185     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3186     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3187     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3188     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3189     ierr = PetscFree(dlens);CHKERRQ(ierr);
3190   } else {
3191     PetscInt ml,nl;
3192 
3193     M    = *newmat;
3194     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3195     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3196     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3197     /*
3198          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3199        rather than the slower MatSetValues().
3200     */
3201     M->was_assembled = PETSC_TRUE;
3202     M->assembled     = PETSC_FALSE;
3203   }
3204   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3205   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3206   ii   = aij->i;
3207   jj   = aij->j;
3208   aa   = aij->a;
3209   for (i=0; i<m; i++) {
3210     row   = rstart + i;
3211     nz    = ii[i+1] - ii[i];
3212     cwork = jj;     jj += nz;
3213     vwork = aa;     aa += nz;
3214     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3215   }
3216 
3217   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3218   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3219   *newmat = M;
3220 
3221   /* save submatrix used in processor for next request */
3222   if (call ==  MAT_INITIAL_MATRIX) {
3223     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3224     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3225   }
3226   PetscFunctionReturn(0);
3227 }
3228 
3229 #undef __FUNCT__
3230 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3231 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3232 {
3233   PetscInt       m,cstart, cend,j,nnz,i,d;
3234   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3235   const PetscInt *JJ;
3236   PetscScalar    *values;
3237   PetscErrorCode ierr;
3238 
3239   PetscFunctionBegin;
3240   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3241 
3242   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3243   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3244   m      = B->rmap->n;
3245   cstart = B->cmap->rstart;
3246   cend   = B->cmap->rend;
3247   rstart = B->rmap->rstart;
3248 
3249   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3250 
3251 #if defined(PETSC_USE_DEBUGGING)
3252   for (i=0; i<m; i++) {
3253     nnz = Ii[i+1]- Ii[i];
3254     JJ  = J + Ii[i];
3255     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3256     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3257     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3258   }
3259 #endif
3260 
3261   for (i=0; i<m; i++) {
3262     nnz     = Ii[i+1]- Ii[i];
3263     JJ      = J + Ii[i];
3264     nnz_max = PetscMax(nnz_max,nnz);
3265     d       = 0;
3266     for (j=0; j<nnz; j++) {
3267       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3268     }
3269     d_nnz[i] = d;
3270     o_nnz[i] = nnz - d;
3271   }
3272   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3273   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3274 
3275   if (v) values = (PetscScalar*)v;
3276   else {
3277     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3278   }
3279 
3280   for (i=0; i<m; i++) {
3281     ii   = i + rstart;
3282     nnz  = Ii[i+1]- Ii[i];
3283     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3284   }
3285   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3286   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3287 
3288   if (!v) {
3289     ierr = PetscFree(values);CHKERRQ(ierr);
3290   }
3291   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3292   PetscFunctionReturn(0);
3293 }
3294 
3295 #undef __FUNCT__
3296 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3297 /*@
3298    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3299    (the default parallel PETSc format).
3300 
3301    Collective on MPI_Comm
3302 
3303    Input Parameters:
3304 +  B - the matrix
3305 .  i - the indices into j for the start of each local row (starts with zero)
3306 .  j - the column indices for each local row (starts with zero)
3307 -  v - optional values in the matrix
3308 
3309    Level: developer
3310 
3311    Notes:
3312        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3313      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3314      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3315 
3316        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3317 
3318        The format which is used for the sparse matrix input, is equivalent to a
3319     row-major ordering.. i.e for the following matrix, the input data expected is
3320     as shown
3321 
3322 $        1 0 0
3323 $        2 0 3     P0
3324 $       -------
3325 $        4 5 6     P1
3326 $
3327 $     Process0 [P0]: rows_owned=[0,1]
3328 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3329 $        j =  {0,0,2}  [size = 3]
3330 $        v =  {1,2,3}  [size = 3]
3331 $
3332 $     Process1 [P1]: rows_owned=[2]
3333 $        i =  {0,3}    [size = nrow+1  = 1+1]
3334 $        j =  {0,1,2}  [size = 3]
3335 $        v =  {4,5,6}  [size = 3]
3336 
3337 .keywords: matrix, aij, compressed row, sparse, parallel
3338 
3339 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3340           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3341 @*/
3342 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3343 {
3344   PetscErrorCode ierr;
3345 
3346   PetscFunctionBegin;
3347   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3348   PetscFunctionReturn(0);
3349 }
3350 
3351 #undef __FUNCT__
3352 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3353 /*@C
3354    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3355    (the default parallel PETSc format).  For good matrix assembly performance
3356    the user should preallocate the matrix storage by setting the parameters
3357    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3358    performance can be increased by more than a factor of 50.
3359 
3360    Collective on MPI_Comm
3361 
3362    Input Parameters:
3363 +  B - the matrix
3364 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3365            (same value is used for all local rows)
3366 .  d_nnz - array containing the number of nonzeros in the various rows of the
3367            DIAGONAL portion of the local submatrix (possibly different for each row)
3368            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3369            The size of this array is equal to the number of local rows, i.e 'm'.
3370            For matrices that will be factored, you must leave room for (and set)
3371            the diagonal entry even if it is zero.
3372 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3373            submatrix (same value is used for all local rows).
3374 -  o_nnz - array containing the number of nonzeros in the various rows of the
3375            OFF-DIAGONAL portion of the local submatrix (possibly different for
3376            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3377            structure. The size of this array is equal to the number
3378            of local rows, i.e 'm'.
3379 
3380    If the *_nnz parameter is given then the *_nz parameter is ignored
3381 
3382    The AIJ format (also called the Yale sparse matrix format or
3383    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3384    storage.  The stored row and column indices begin with zero.
3385    See Users-Manual: ch_mat for details.
3386 
3387    The parallel matrix is partitioned such that the first m0 rows belong to
3388    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3389    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3390 
3391    The DIAGONAL portion of the local submatrix of a processor can be defined
3392    as the submatrix which is obtained by extraction the part corresponding to
3393    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3394    first row that belongs to the processor, r2 is the last row belonging to
3395    the this processor, and c1-c2 is range of indices of the local part of a
3396    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3397    common case of a square matrix, the row and column ranges are the same and
3398    the DIAGONAL part is also square. The remaining portion of the local
3399    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3400 
3401    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3402 
3403    You can call MatGetInfo() to get information on how effective the preallocation was;
3404    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3405    You can also run with the option -info and look for messages with the string
3406    malloc in them to see if additional memory allocation was needed.
3407 
3408    Example usage:
3409 
3410    Consider the following 8x8 matrix with 34 non-zero values, that is
3411    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3412    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3413    as follows:
3414 
3415 .vb
3416             1  2  0  |  0  3  0  |  0  4
3417     Proc0   0  5  6  |  7  0  0  |  8  0
3418             9  0 10  | 11  0  0  | 12  0
3419     -------------------------------------
3420            13  0 14  | 15 16 17  |  0  0
3421     Proc1   0 18  0  | 19 20 21  |  0  0
3422             0  0  0  | 22 23  0  | 24  0
3423     -------------------------------------
3424     Proc2  25 26 27  |  0  0 28  | 29  0
3425            30  0  0  | 31 32 33  |  0 34
3426 .ve
3427 
3428    This can be represented as a collection of submatrices as:
3429 
3430 .vb
3431       A B C
3432       D E F
3433       G H I
3434 .ve
3435 
3436    Where the submatrices A,B,C are owned by proc0, D,E,F are
3437    owned by proc1, G,H,I are owned by proc2.
3438 
3439    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3440    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3441    The 'M','N' parameters are 8,8, and have the same values on all procs.
3442 
3443    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3444    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3445    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3446    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3447    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3448    matrix, ans [DF] as another SeqAIJ matrix.
3449 
3450    When d_nz, o_nz parameters are specified, d_nz storage elements are
3451    allocated for every row of the local diagonal submatrix, and o_nz
3452    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3453    One way to choose d_nz and o_nz is to use the max nonzerors per local
3454    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3455    In this case, the values of d_nz,o_nz are:
3456 .vb
3457      proc0 : dnz = 2, o_nz = 2
3458      proc1 : dnz = 3, o_nz = 2
3459      proc2 : dnz = 1, o_nz = 4
3460 .ve
3461    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3462    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3463    for proc3. i.e we are using 12+15+10=37 storage locations to store
3464    34 values.
3465 
3466    When d_nnz, o_nnz parameters are specified, the storage is specified
3467    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3468    In the above case the values for d_nnz,o_nnz are:
3469 .vb
3470      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3471      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3472      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3473 .ve
3474    Here the space allocated is sum of all the above values i.e 34, and
3475    hence pre-allocation is perfect.
3476 
3477    Level: intermediate
3478 
3479 .keywords: matrix, aij, compressed row, sparse, parallel
3480 
3481 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3482           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3483 @*/
3484 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3485 {
3486   PetscErrorCode ierr;
3487 
3488   PetscFunctionBegin;
3489   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3490   PetscValidType(B,1);
3491   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3492   PetscFunctionReturn(0);
3493 }
3494 
3495 #undef __FUNCT__
3496 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3497 /*@
3498      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3499          CSR format the local rows.
3500 
3501    Collective on MPI_Comm
3502 
3503    Input Parameters:
3504 +  comm - MPI communicator
3505 .  m - number of local rows (Cannot be PETSC_DECIDE)
3506 .  n - This value should be the same as the local size used in creating the
3507        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3508        calculated if N is given) For square matrices n is almost always m.
3509 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3510 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3511 .   i - row indices
3512 .   j - column indices
3513 -   a - matrix values
3514 
3515    Output Parameter:
3516 .   mat - the matrix
3517 
3518    Level: intermediate
3519 
3520    Notes:
3521        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3522      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3523      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3524 
3525        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3526 
3527        The format which is used for the sparse matrix input, is equivalent to a
3528     row-major ordering.. i.e for the following matrix, the input data expected is
3529     as shown
3530 
3531 $        1 0 0
3532 $        2 0 3     P0
3533 $       -------
3534 $        4 5 6     P1
3535 $
3536 $     Process0 [P0]: rows_owned=[0,1]
3537 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3538 $        j =  {0,0,2}  [size = 3]
3539 $        v =  {1,2,3}  [size = 3]
3540 $
3541 $     Process1 [P1]: rows_owned=[2]
3542 $        i =  {0,3}    [size = nrow+1  = 1+1]
3543 $        j =  {0,1,2}  [size = 3]
3544 $        v =  {4,5,6}  [size = 3]
3545 
3546 .keywords: matrix, aij, compressed row, sparse, parallel
3547 
3548 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3549           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3550 @*/
3551 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3552 {
3553   PetscErrorCode ierr;
3554 
3555   PetscFunctionBegin;
3556   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3557   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3558   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3559   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3560   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
3561   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3562   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3563   PetscFunctionReturn(0);
3564 }
3565 
3566 #undef __FUNCT__
3567 #define __FUNCT__ "MatCreateAIJ"
3568 /*@C
3569    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3570    (the default parallel PETSc format).  For good matrix assembly performance
3571    the user should preallocate the matrix storage by setting the parameters
3572    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3573    performance can be increased by more than a factor of 50.
3574 
3575    Collective on MPI_Comm
3576 
3577    Input Parameters:
3578 +  comm - MPI communicator
3579 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3580            This value should be the same as the local size used in creating the
3581            y vector for the matrix-vector product y = Ax.
3582 .  n - This value should be the same as the local size used in creating the
3583        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3584        calculated if N is given) For square matrices n is almost always m.
3585 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3586 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3587 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3588            (same value is used for all local rows)
3589 .  d_nnz - array containing the number of nonzeros in the various rows of the
3590            DIAGONAL portion of the local submatrix (possibly different for each row)
3591            or NULL, if d_nz is used to specify the nonzero structure.
3592            The size of this array is equal to the number of local rows, i.e 'm'.
3593 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3594            submatrix (same value is used for all local rows).
3595 -  o_nnz - array containing the number of nonzeros in the various rows of the
3596            OFF-DIAGONAL portion of the local submatrix (possibly different for
3597            each row) or NULL, if o_nz is used to specify the nonzero
3598            structure. The size of this array is equal to the number
3599            of local rows, i.e 'm'.
3600 
3601    Output Parameter:
3602 .  A - the matrix
3603 
3604    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3605    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3606    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3607 
3608    Notes:
3609    If the *_nnz parameter is given then the *_nz parameter is ignored
3610 
3611    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3612    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3613    storage requirements for this matrix.
3614 
3615    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3616    processor than it must be used on all processors that share the object for
3617    that argument.
3618 
3619    The user MUST specify either the local or global matrix dimensions
3620    (possibly both).
3621 
3622    The parallel matrix is partitioned across processors such that the
3623    first m0 rows belong to process 0, the next m1 rows belong to
3624    process 1, the next m2 rows belong to process 2 etc.. where
3625    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3626    values corresponding to [m x N] submatrix.
3627 
3628    The columns are logically partitioned with the n0 columns belonging
3629    to 0th partition, the next n1 columns belonging to the next
3630    partition etc.. where n0,n1,n2... are the input parameter 'n'.
3631 
3632    The DIAGONAL portion of the local submatrix on any given processor
3633    is the submatrix corresponding to the rows and columns m,n
3634    corresponding to the given processor. i.e diagonal matrix on
3635    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3636    etc. The remaining portion of the local submatrix [m x (N-n)]
3637    constitute the OFF-DIAGONAL portion. The example below better
3638    illustrates this concept.
3639 
3640    For a square global matrix we define each processor's diagonal portion
3641    to be its local rows and the corresponding columns (a square submatrix);
3642    each processor's off-diagonal portion encompasses the remainder of the
3643    local matrix (a rectangular submatrix).
3644 
3645    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3646 
3647    When calling this routine with a single process communicator, a matrix of
3648    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3649    type of communicator, use the construction mechanism:
3650      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3651 
3652    By default, this format uses inodes (identical nodes) when possible.
3653    We search for consecutive rows with the same nonzero structure, thereby
3654    reusing matrix information to achieve increased efficiency.
3655 
3656    Options Database Keys:
3657 +  -mat_no_inode  - Do not use inodes
3658 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3659 -  -mat_aij_oneindex - Internally use indexing starting at 1
3660         rather than 0.  Note that when calling MatSetValues(),
3661         the user still MUST index entries starting at 0!
3662 
3663 
3664    Example usage:
3665 
3666    Consider the following 8x8 matrix with 34 non-zero values, that is
3667    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3668    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3669    as follows:
3670 
3671 .vb
3672             1  2  0  |  0  3  0  |  0  4
3673     Proc0   0  5  6  |  7  0  0  |  8  0
3674             9  0 10  | 11  0  0  | 12  0
3675     -------------------------------------
3676            13  0 14  | 15 16 17  |  0  0
3677     Proc1   0 18  0  | 19 20 21  |  0  0
3678             0  0  0  | 22 23  0  | 24  0
3679     -------------------------------------
3680     Proc2  25 26 27  |  0  0 28  | 29  0
3681            30  0  0  | 31 32 33  |  0 34
3682 .ve
3683 
3684    This can be represented as a collection of submatrices as:
3685 
3686 .vb
3687       A B C
3688       D E F
3689       G H I
3690 .ve
3691 
3692    Where the submatrices A,B,C are owned by proc0, D,E,F are
3693    owned by proc1, G,H,I are owned by proc2.
3694 
3695    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3696    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3697    The 'M','N' parameters are 8,8, and have the same values on all procs.
3698 
3699    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3700    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3701    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3702    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3703    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3704    matrix, ans [DF] as another SeqAIJ matrix.
3705 
3706    When d_nz, o_nz parameters are specified, d_nz storage elements are
3707    allocated for every row of the local diagonal submatrix, and o_nz
3708    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3709    One way to choose d_nz and o_nz is to use the max nonzerors per local
3710    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3711    In this case, the values of d_nz,o_nz are:
3712 .vb
3713      proc0 : dnz = 2, o_nz = 2
3714      proc1 : dnz = 3, o_nz = 2
3715      proc2 : dnz = 1, o_nz = 4
3716 .ve
3717    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3718    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3719    for proc3. i.e we are using 12+15+10=37 storage locations to store
3720    34 values.
3721 
3722    When d_nnz, o_nnz parameters are specified, the storage is specified
3723    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3724    In the above case the values for d_nnz,o_nnz are:
3725 .vb
3726      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3727      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3728      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3729 .ve
3730    Here the space allocated is sum of all the above values i.e 34, and
3731    hence pre-allocation is perfect.
3732 
3733    Level: intermediate
3734 
3735 .keywords: matrix, aij, compressed row, sparse, parallel
3736 
3737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3738           MPIAIJ, MatCreateMPIAIJWithArrays()
3739 @*/
3740 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3741 {
3742   PetscErrorCode ierr;
3743   PetscMPIInt    size;
3744 
3745   PetscFunctionBegin;
3746   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3747   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3748   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3749   if (size > 1) {
3750     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3751     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3752   } else {
3753     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3754     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3755   }
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 #undef __FUNCT__
3760 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3761 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3762 {
3763   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3764 
3765   PetscFunctionBegin;
3766   if (Ad)     *Ad     = a->A;
3767   if (Ao)     *Ao     = a->B;
3768   if (colmap) *colmap = a->garray;
3769   PetscFunctionReturn(0);
3770 }
3771 
3772 #undef __FUNCT__
3773 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3774 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3775 {
3776   PetscErrorCode ierr;
3777   PetscInt       i;
3778   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3779 
3780   PetscFunctionBegin;
3781   if (coloring->ctype == IS_COLORING_GLOBAL) {
3782     ISColoringValue *allcolors,*colors;
3783     ISColoring      ocoloring;
3784 
3785     /* set coloring for diagonal portion */
3786     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3787 
3788     /* set coloring for off-diagonal portion */
3789     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
3790     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3791     for (i=0; i<a->B->cmap->n; i++) {
3792       colors[i] = allcolors[a->garray[i]];
3793     }
3794     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3795     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3796     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3797     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3798   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3799     ISColoringValue *colors;
3800     PetscInt        *larray;
3801     ISColoring      ocoloring;
3802 
3803     /* set coloring for diagonal portion */
3804     ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr);
3805     for (i=0; i<a->A->cmap->n; i++) {
3806       larray[i] = i + A->cmap->rstart;
3807     }
3808     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
3809     ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr);
3810     for (i=0; i<a->A->cmap->n; i++) {
3811       colors[i] = coloring->colors[larray[i]];
3812     }
3813     ierr = PetscFree(larray);CHKERRQ(ierr);
3814     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3815     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3816     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3817 
3818     /* set coloring for off-diagonal portion */
3819     ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr);
3820     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
3821     ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr);
3822     for (i=0; i<a->B->cmap->n; i++) {
3823       colors[i] = coloring->colors[larray[i]];
3824     }
3825     ierr = PetscFree(larray);CHKERRQ(ierr);
3826     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr);
3827     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3828     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
3829   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3830   PetscFunctionReturn(0);
3831 }
3832 
3833 #undef __FUNCT__
3834 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3835 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3836 {
3837   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3838   PetscErrorCode ierr;
3839 
3840   PetscFunctionBegin;
3841   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3842   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3843   PetscFunctionReturn(0);
3844 }
3845 
3846 #undef __FUNCT__
3847 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ"
3848 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3849 {
3850   PetscErrorCode ierr;
3851   PetscInt       m,N,i,rstart,nnz,Ii;
3852   PetscInt       *indx;
3853   PetscScalar    *values;
3854 
3855   PetscFunctionBegin;
3856   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3857   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3858     PetscInt       *dnz,*onz,sum,bs,cbs;
3859 
3860     if (n == PETSC_DECIDE) {
3861       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3862     }
3863     /* Check sum(n) = N */
3864     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3865     if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3866 
3867     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3868     rstart -= m;
3869 
3870     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3871     for (i=0; i<m; i++) {
3872       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3873       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3874       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
3875     }
3876 
3877     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3878     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3879     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3880     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3881     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3882     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3883     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3884   }
3885 
3886   /* numeric phase */
3887   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3888   for (i=0; i<m; i++) {
3889     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3890     Ii   = i + rstart;
3891     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3892     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3893   }
3894   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3895   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 #undef __FUNCT__
3900 #define __FUNCT__ "MatFileSplit"
3901 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3902 {
3903   PetscErrorCode    ierr;
3904   PetscMPIInt       rank;
3905   PetscInt          m,N,i,rstart,nnz;
3906   size_t            len;
3907   const PetscInt    *indx;
3908   PetscViewer       out;
3909   char              *name;
3910   Mat               B;
3911   const PetscScalar *values;
3912 
3913   PetscFunctionBegin;
3914   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3915   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3916   /* Should this be the type of the diagonal block of A? */
3917   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3918   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3919   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
3920   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3921   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3922   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3923   for (i=0; i<m; i++) {
3924     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3925     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3926     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3927   }
3928   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3929   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3930 
3931   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3932   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3933   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
3934   sprintf(name,"%s.%d",outfile,rank);
3935   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3936   ierr = PetscFree(name);CHKERRQ(ierr);
3937   ierr = MatView(B,out);CHKERRQ(ierr);
3938   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
3939   ierr = MatDestroy(&B);CHKERRQ(ierr);
3940   PetscFunctionReturn(0);
3941 }
3942 
3943 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
3944 #undef __FUNCT__
3945 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3946 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3947 {
3948   PetscErrorCode      ierr;
3949   Mat_Merge_SeqsToMPI *merge;
3950   PetscContainer      container;
3951 
3952   PetscFunctionBegin;
3953   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
3954   if (container) {
3955     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
3956     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3957     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3958     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3959     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3960     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3961     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3962     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3963     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3964     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3965     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3966     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3967     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3968     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
3969     ierr = PetscFree(merge);CHKERRQ(ierr);
3970     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3971   }
3972   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3973   PetscFunctionReturn(0);
3974 }
3975 
3976 #include <../src/mat/utils/freespace.h>
3977 #include <petscbt.h>
3978 
3979 #undef __FUNCT__
3980 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
3981 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
3982 {
3983   PetscErrorCode      ierr;
3984   MPI_Comm            comm;
3985   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
3986   PetscMPIInt         size,rank,taga,*len_s;
3987   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
3988   PetscInt            proc,m;
3989   PetscInt            **buf_ri,**buf_rj;
3990   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3991   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
3992   MPI_Request         *s_waits,*r_waits;
3993   MPI_Status          *status;
3994   MatScalar           *aa=a->a;
3995   MatScalar           **abuf_r,*ba_i;
3996   Mat_Merge_SeqsToMPI *merge;
3997   PetscContainer      container;
3998 
3999   PetscFunctionBegin;
4000   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4001   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4002 
4003   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4004   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4005 
4006   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4007   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4008 
4009   bi     = merge->bi;
4010   bj     = merge->bj;
4011   buf_ri = merge->buf_ri;
4012   buf_rj = merge->buf_rj;
4013 
4014   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4015   owners = merge->rowmap->range;
4016   len_s  = merge->len_s;
4017 
4018   /* send and recv matrix values */
4019   /*-----------------------------*/
4020   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4021   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4022 
4023   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4024   for (proc=0,k=0; proc<size; proc++) {
4025     if (!len_s[proc]) continue;
4026     i    = owners[proc];
4027     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4028     k++;
4029   }
4030 
4031   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4032   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4033   ierr = PetscFree(status);CHKERRQ(ierr);
4034 
4035   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4036   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4037 
4038   /* insert mat values of mpimat */
4039   /*----------------------------*/
4040   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4041   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4042 
4043   for (k=0; k<merge->nrecv; k++) {
4044     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4045     nrows       = *(buf_ri_k[k]);
4046     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4047     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4048   }
4049 
4050   /* set values of ba */
4051   m = merge->rowmap->n;
4052   for (i=0; i<m; i++) {
4053     arow = owners[rank] + i;
4054     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4055     bnzi = bi[i+1] - bi[i];
4056     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4057 
4058     /* add local non-zero vals of this proc's seqmat into ba */
4059     anzi   = ai[arow+1] - ai[arow];
4060     aj     = a->j + ai[arow];
4061     aa     = a->a + ai[arow];
4062     nextaj = 0;
4063     for (j=0; nextaj<anzi; j++) {
4064       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4065         ba_i[j] += aa[nextaj++];
4066       }
4067     }
4068 
4069     /* add received vals into ba */
4070     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4071       /* i-th row */
4072       if (i == *nextrow[k]) {
4073         anzi   = *(nextai[k]+1) - *nextai[k];
4074         aj     = buf_rj[k] + *(nextai[k]);
4075         aa     = abuf_r[k] + *(nextai[k]);
4076         nextaj = 0;
4077         for (j=0; nextaj<anzi; j++) {
4078           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4079             ba_i[j] += aa[nextaj++];
4080           }
4081         }
4082         nextrow[k]++; nextai[k]++;
4083       }
4084     }
4085     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4086   }
4087   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4088   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4089 
4090   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4091   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4092   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4093   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4094   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4095   PetscFunctionReturn(0);
4096 }
4097 
4098 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4099 
4100 #undef __FUNCT__
4101 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4102 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4103 {
4104   PetscErrorCode      ierr;
4105   Mat                 B_mpi;
4106   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4107   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4108   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4109   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4110   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4111   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4112   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4113   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4114   MPI_Status          *status;
4115   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4116   PetscBT             lnkbt;
4117   Mat_Merge_SeqsToMPI *merge;
4118   PetscContainer      container;
4119 
4120   PetscFunctionBegin;
4121   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4122 
4123   /* make sure it is a PETSc comm */
4124   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4125   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4126   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4127 
4128   ierr = PetscNew(&merge);CHKERRQ(ierr);
4129   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4130 
4131   /* determine row ownership */
4132   /*---------------------------------------------------------*/
4133   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4134   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4135   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4136   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4137   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4138   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4139   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4140 
4141   m      = merge->rowmap->n;
4142   owners = merge->rowmap->range;
4143 
4144   /* determine the number of messages to send, their lengths */
4145   /*---------------------------------------------------------*/
4146   len_s = merge->len_s;
4147 
4148   len          = 0; /* length of buf_si[] */
4149   merge->nsend = 0;
4150   for (proc=0; proc<size; proc++) {
4151     len_si[proc] = 0;
4152     if (proc == rank) {
4153       len_s[proc] = 0;
4154     } else {
4155       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4156       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4157     }
4158     if (len_s[proc]) {
4159       merge->nsend++;
4160       nrows = 0;
4161       for (i=owners[proc]; i<owners[proc+1]; i++) {
4162         if (ai[i+1] > ai[i]) nrows++;
4163       }
4164       len_si[proc] = 2*(nrows+1);
4165       len         += len_si[proc];
4166     }
4167   }
4168 
4169   /* determine the number and length of messages to receive for ij-structure */
4170   /*-------------------------------------------------------------------------*/
4171   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4172   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4173 
4174   /* post the Irecv of j-structure */
4175   /*-------------------------------*/
4176   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4177   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4178 
4179   /* post the Isend of j-structure */
4180   /*--------------------------------*/
4181   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4182 
4183   for (proc=0, k=0; proc<size; proc++) {
4184     if (!len_s[proc]) continue;
4185     i    = owners[proc];
4186     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4187     k++;
4188   }
4189 
4190   /* receives and sends of j-structure are complete */
4191   /*------------------------------------------------*/
4192   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4193   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4194 
4195   /* send and recv i-structure */
4196   /*---------------------------*/
4197   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4198   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4199 
4200   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4201   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4202   for (proc=0,k=0; proc<size; proc++) {
4203     if (!len_s[proc]) continue;
4204     /* form outgoing message for i-structure:
4205          buf_si[0]:                 nrows to be sent
4206                [1:nrows]:           row index (global)
4207                [nrows+1:2*nrows+1]: i-structure index
4208     */
4209     /*-------------------------------------------*/
4210     nrows       = len_si[proc]/2 - 1;
4211     buf_si_i    = buf_si + nrows+1;
4212     buf_si[0]   = nrows;
4213     buf_si_i[0] = 0;
4214     nrows       = 0;
4215     for (i=owners[proc]; i<owners[proc+1]; i++) {
4216       anzi = ai[i+1] - ai[i];
4217       if (anzi) {
4218         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4219         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4220         nrows++;
4221       }
4222     }
4223     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4224     k++;
4225     buf_si += len_si[proc];
4226   }
4227 
4228   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4229   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4230 
4231   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4232   for (i=0; i<merge->nrecv; i++) {
4233     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4234   }
4235 
4236   ierr = PetscFree(len_si);CHKERRQ(ierr);
4237   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4238   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4239   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4240   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4241   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4242   ierr = PetscFree(status);CHKERRQ(ierr);
4243 
4244   /* compute a local seq matrix in each processor */
4245   /*----------------------------------------------*/
4246   /* allocate bi array and free space for accumulating nonzero column info */
4247   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4248   bi[0] = 0;
4249 
4250   /* create and initialize a linked list */
4251   nlnk = N+1;
4252   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4253 
4254   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4255   len  = ai[owners[rank+1]] - ai[owners[rank]];
4256   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4257 
4258   current_space = free_space;
4259 
4260   /* determine symbolic info for each local row */
4261   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4262 
4263   for (k=0; k<merge->nrecv; k++) {
4264     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4265     nrows       = *buf_ri_k[k];
4266     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4267     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4268   }
4269 
4270   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4271   len  = 0;
4272   for (i=0; i<m; i++) {
4273     bnzi = 0;
4274     /* add local non-zero cols of this proc's seqmat into lnk */
4275     arow  = owners[rank] + i;
4276     anzi  = ai[arow+1] - ai[arow];
4277     aj    = a->j + ai[arow];
4278     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4279     bnzi += nlnk;
4280     /* add received col data into lnk */
4281     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4282       if (i == *nextrow[k]) { /* i-th row */
4283         anzi  = *(nextai[k]+1) - *nextai[k];
4284         aj    = buf_rj[k] + *nextai[k];
4285         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4286         bnzi += nlnk;
4287         nextrow[k]++; nextai[k]++;
4288       }
4289     }
4290     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4291 
4292     /* if free space is not available, make more free space */
4293     if (current_space->local_remaining<bnzi) {
4294       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4295       nspacedouble++;
4296     }
4297     /* copy data into free space, then initialize lnk */
4298     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4299     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4300 
4301     current_space->array           += bnzi;
4302     current_space->local_used      += bnzi;
4303     current_space->local_remaining -= bnzi;
4304 
4305     bi[i+1] = bi[i] + bnzi;
4306   }
4307 
4308   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4309 
4310   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4311   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4312   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4313 
4314   /* create symbolic parallel matrix B_mpi */
4315   /*---------------------------------------*/
4316   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4317   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4318   if (n==PETSC_DECIDE) {
4319     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4320   } else {
4321     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4322   }
4323   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4324   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4325   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4326   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4327   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4328 
4329   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4330   B_mpi->assembled    = PETSC_FALSE;
4331   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4332   merge->bi           = bi;
4333   merge->bj           = bj;
4334   merge->buf_ri       = buf_ri;
4335   merge->buf_rj       = buf_rj;
4336   merge->coi          = NULL;
4337   merge->coj          = NULL;
4338   merge->owners_co    = NULL;
4339 
4340   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4341 
4342   /* attach the supporting struct to B_mpi for reuse */
4343   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4344   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4345   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4346   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4347   *mpimat = B_mpi;
4348 
4349   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4350   PetscFunctionReturn(0);
4351 }
4352 
4353 #undef __FUNCT__
4354 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4355 /*@C
4356       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4357                  matrices from each processor
4358 
4359     Collective on MPI_Comm
4360 
4361    Input Parameters:
4362 +    comm - the communicators the parallel matrix will live on
4363 .    seqmat - the input sequential matrices
4364 .    m - number of local rows (or PETSC_DECIDE)
4365 .    n - number of local columns (or PETSC_DECIDE)
4366 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4367 
4368    Output Parameter:
4369 .    mpimat - the parallel matrix generated
4370 
4371     Level: advanced
4372 
4373    Notes:
4374      The dimensions of the sequential matrix in each processor MUST be the same.
4375      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4376      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4377 @*/
4378 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4379 {
4380   PetscErrorCode ierr;
4381   PetscMPIInt    size;
4382 
4383   PetscFunctionBegin;
4384   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4385   if (size == 1) {
4386     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4387     if (scall == MAT_INITIAL_MATRIX) {
4388       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4389     } else {
4390       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4391     }
4392     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4393     PetscFunctionReturn(0);
4394   }
4395   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4396   if (scall == MAT_INITIAL_MATRIX) {
4397     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4398   }
4399   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4400   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4401   PetscFunctionReturn(0);
4402 }
4403 
4404 #undef __FUNCT__
4405 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4406 /*@
4407      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4408           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4409           with MatGetSize()
4410 
4411     Not Collective
4412 
4413    Input Parameters:
4414 +    A - the matrix
4415 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4416 
4417    Output Parameter:
4418 .    A_loc - the local sequential matrix generated
4419 
4420     Level: developer
4421 
4422 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4423 
4424 @*/
4425 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4426 {
4427   PetscErrorCode ierr;
4428   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4429   Mat_SeqAIJ     *mat,*a,*b;
4430   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4431   MatScalar      *aa,*ba,*cam;
4432   PetscScalar    *ca;
4433   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4434   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4435   PetscBool      match;
4436   MPI_Comm       comm;
4437   PetscMPIInt    size;
4438 
4439   PetscFunctionBegin;
4440   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4441   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4442   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4443   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4444   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4445 
4446   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4447   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4448   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4449   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4450   aa = a->a; ba = b->a;
4451   if (scall == MAT_INITIAL_MATRIX) {
4452     if (size == 1) {
4453       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4454       PetscFunctionReturn(0);
4455     }
4456 
4457     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4458     ci[0] = 0;
4459     for (i=0; i<am; i++) {
4460       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4461     }
4462     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4463     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4464     k    = 0;
4465     for (i=0; i<am; i++) {
4466       ncols_o = bi[i+1] - bi[i];
4467       ncols_d = ai[i+1] - ai[i];
4468       /* off-diagonal portion of A */
4469       for (jo=0; jo<ncols_o; jo++) {
4470         col = cmap[*bj];
4471         if (col >= cstart) break;
4472         cj[k]   = col; bj++;
4473         ca[k++] = *ba++;
4474       }
4475       /* diagonal portion of A */
4476       for (j=0; j<ncols_d; j++) {
4477         cj[k]   = cstart + *aj++;
4478         ca[k++] = *aa++;
4479       }
4480       /* off-diagonal portion of A */
4481       for (j=jo; j<ncols_o; j++) {
4482         cj[k]   = cmap[*bj++];
4483         ca[k++] = *ba++;
4484       }
4485     }
4486     /* put together the new matrix */
4487     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4488     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4489     /* Since these are PETSc arrays, change flags to free them as necessary. */
4490     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4491     mat->free_a  = PETSC_TRUE;
4492     mat->free_ij = PETSC_TRUE;
4493     mat->nonew   = 0;
4494   } else if (scall == MAT_REUSE_MATRIX) {
4495     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4496     ci = mat->i; cj = mat->j; cam = mat->a;
4497     for (i=0; i<am; i++) {
4498       /* off-diagonal portion of A */
4499       ncols_o = bi[i+1] - bi[i];
4500       for (jo=0; jo<ncols_o; jo++) {
4501         col = cmap[*bj];
4502         if (col >= cstart) break;
4503         *cam++ = *ba++; bj++;
4504       }
4505       /* diagonal portion of A */
4506       ncols_d = ai[i+1] - ai[i];
4507       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4508       /* off-diagonal portion of A */
4509       for (j=jo; j<ncols_o; j++) {
4510         *cam++ = *ba++; bj++;
4511       }
4512     }
4513   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4514   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4515   PetscFunctionReturn(0);
4516 }
4517 
4518 #undef __FUNCT__
4519 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4520 /*@C
4521      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4522 
4523     Not Collective
4524 
4525    Input Parameters:
4526 +    A - the matrix
4527 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4528 -    row, col - index sets of rows and columns to extract (or NULL)
4529 
4530    Output Parameter:
4531 .    A_loc - the local sequential matrix generated
4532 
4533     Level: developer
4534 
4535 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4536 
4537 @*/
4538 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4539 {
4540   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4541   PetscErrorCode ierr;
4542   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4543   IS             isrowa,iscola;
4544   Mat            *aloc;
4545   PetscBool      match;
4546 
4547   PetscFunctionBegin;
4548   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4549   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4550   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4551   if (!row) {
4552     start = A->rmap->rstart; end = A->rmap->rend;
4553     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4554   } else {
4555     isrowa = *row;
4556   }
4557   if (!col) {
4558     start = A->cmap->rstart;
4559     cmap  = a->garray;
4560     nzA   = a->A->cmap->n;
4561     nzB   = a->B->cmap->n;
4562     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4563     ncols = 0;
4564     for (i=0; i<nzB; i++) {
4565       if (cmap[i] < start) idx[ncols++] = cmap[i];
4566       else break;
4567     }
4568     imark = i;
4569     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4570     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4571     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4572   } else {
4573     iscola = *col;
4574   }
4575   if (scall != MAT_INITIAL_MATRIX) {
4576     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4577     aloc[0] = *A_loc;
4578   }
4579   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4580   *A_loc = aloc[0];
4581   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4582   if (!row) {
4583     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4584   }
4585   if (!col) {
4586     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4587   }
4588   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4589   PetscFunctionReturn(0);
4590 }
4591 
4592 #undef __FUNCT__
4593 #define __FUNCT__ "MatGetBrowsOfAcols"
4594 /*@C
4595     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4596 
4597     Collective on Mat
4598 
4599    Input Parameters:
4600 +    A,B - the matrices in mpiaij format
4601 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4602 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
4603 
4604    Output Parameter:
4605 +    rowb, colb - index sets of rows and columns of B to extract
4606 -    B_seq - the sequential matrix generated
4607 
4608     Level: developer
4609 
4610 @*/
4611 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
4612 {
4613   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4614   PetscErrorCode ierr;
4615   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4616   IS             isrowb,iscolb;
4617   Mat            *bseq=NULL;
4618 
4619   PetscFunctionBegin;
4620   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4621     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4622   }
4623   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4624 
4625   if (scall == MAT_INITIAL_MATRIX) {
4626     start = A->cmap->rstart;
4627     cmap  = a->garray;
4628     nzA   = a->A->cmap->n;
4629     nzB   = a->B->cmap->n;
4630     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4631     ncols = 0;
4632     for (i=0; i<nzB; i++) {  /* row < local row index */
4633       if (cmap[i] < start) idx[ncols++] = cmap[i];
4634       else break;
4635     }
4636     imark = i;
4637     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4638     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4639     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4640     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4641   } else {
4642     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4643     isrowb  = *rowb; iscolb = *colb;
4644     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
4645     bseq[0] = *B_seq;
4646   }
4647   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4648   *B_seq = bseq[0];
4649   ierr   = PetscFree(bseq);CHKERRQ(ierr);
4650   if (!rowb) {
4651     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
4652   } else {
4653     *rowb = isrowb;
4654   }
4655   if (!colb) {
4656     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
4657   } else {
4658     *colb = iscolb;
4659   }
4660   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4661   PetscFunctionReturn(0);
4662 }
4663 
4664 #undef __FUNCT__
4665 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
4666 /*
4667     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4668     of the OFF-DIAGONAL portion of local A
4669 
4670     Collective on Mat
4671 
4672    Input Parameters:
4673 +    A,B - the matrices in mpiaij format
4674 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4675 
4676    Output Parameter:
4677 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
4678 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
4679 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
4680 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
4681 
4682     Level: developer
4683 
4684 */
4685 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4686 {
4687   VecScatter_MPI_General *gen_to,*gen_from;
4688   PetscErrorCode         ierr;
4689   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4690   Mat_SeqAIJ             *b_oth;
4691   VecScatter             ctx =a->Mvctx;
4692   MPI_Comm               comm;
4693   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4694   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4695   PetscScalar            *rvalues,*svalues;
4696   MatScalar              *b_otha,*bufa,*bufA;
4697   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4698   MPI_Request            *rwaits = NULL,*swaits = NULL;
4699   MPI_Status             *sstatus,rstatus;
4700   PetscMPIInt            jj,size;
4701   PetscInt               *cols,sbs,rbs;
4702   PetscScalar            *vals;
4703 
4704   PetscFunctionBegin;
4705   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4706   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4707 
4708   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
4709     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4710   }
4711   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4712   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4713 
4714   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4715   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4716   rvalues  = gen_from->values; /* holds the length of receiving row */
4717   svalues  = gen_to->values;   /* holds the length of sending row */
4718   nrecvs   = gen_from->n;
4719   nsends   = gen_to->n;
4720 
4721   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
4722   srow    = gen_to->indices;    /* local row index to be sent */
4723   sstarts = gen_to->starts;
4724   sprocs  = gen_to->procs;
4725   sstatus = gen_to->sstatus;
4726   sbs     = gen_to->bs;
4727   rstarts = gen_from->starts;
4728   rprocs  = gen_from->procs;
4729   rbs     = gen_from->bs;
4730 
4731   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4732   if (scall == MAT_INITIAL_MATRIX) {
4733     /* i-array */
4734     /*---------*/
4735     /*  post receives */
4736     for (i=0; i<nrecvs; i++) {
4737       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4738       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4739       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4740     }
4741 
4742     /* pack the outgoing message */
4743     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
4744 
4745     sstartsj[0] = 0;
4746     rstartsj[0] = 0;
4747     len         = 0; /* total length of j or a array to be sent */
4748     k           = 0;
4749     for (i=0; i<nsends; i++) {
4750       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4751       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
4752       for (j=0; j<nrows; j++) {
4753         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4754         for (l=0; l<sbs; l++) {
4755           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
4756 
4757           rowlen[j*sbs+l] = ncols;
4758 
4759           len += ncols;
4760           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
4761         }
4762         k++;
4763       }
4764       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4765 
4766       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4767     }
4768     /* recvs and sends of i-array are completed */
4769     i = nrecvs;
4770     while (i--) {
4771       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4772     }
4773     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4774 
4775     /* allocate buffers for sending j and a arrays */
4776     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
4777     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
4778 
4779     /* create i-array of B_oth */
4780     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
4781 
4782     b_othi[0] = 0;
4783     len       = 0; /* total length of j or a array to be received */
4784     k         = 0;
4785     for (i=0; i<nrecvs; i++) {
4786       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4787       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4788       for (j=0; j<nrows; j++) {
4789         b_othi[k+1] = b_othi[k] + rowlen[j];
4790         len        += rowlen[j]; k++;
4791       }
4792       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4793     }
4794 
4795     /* allocate space for j and a arrrays of B_oth */
4796     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
4797     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
4798 
4799     /* j-array */
4800     /*---------*/
4801     /*  post receives of j-array */
4802     for (i=0; i<nrecvs; i++) {
4803       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4804       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4805     }
4806 
4807     /* pack the outgoing message j-array */
4808     k = 0;
4809     for (i=0; i<nsends; i++) {
4810       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4811       bufJ  = bufj+sstartsj[i];
4812       for (j=0; j<nrows; j++) {
4813         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4814         for (ll=0; ll<sbs; ll++) {
4815           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4816           for (l=0; l<ncols; l++) {
4817             *bufJ++ = cols[l];
4818           }
4819           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
4820         }
4821       }
4822       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4823     }
4824 
4825     /* recvs and sends of j-array are completed */
4826     i = nrecvs;
4827     while (i--) {
4828       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4829     }
4830     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4831   } else if (scall == MAT_REUSE_MATRIX) {
4832     sstartsj = *startsj_s;
4833     rstartsj = *startsj_r;
4834     bufa     = *bufa_ptr;
4835     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4836     b_otha   = b_oth->a;
4837   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4838 
4839   /* a-array */
4840   /*---------*/
4841   /*  post receives of a-array */
4842   for (i=0; i<nrecvs; i++) {
4843     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4844     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4845   }
4846 
4847   /* pack the outgoing message a-array */
4848   k = 0;
4849   for (i=0; i<nsends; i++) {
4850     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4851     bufA  = bufa+sstartsj[i];
4852     for (j=0; j<nrows; j++) {
4853       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
4854       for (ll=0; ll<sbs; ll++) {
4855         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4856         for (l=0; l<ncols; l++) {
4857           *bufA++ = vals[l];
4858         }
4859         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
4860       }
4861     }
4862     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4863   }
4864   /* recvs and sends of a-array are completed */
4865   i = nrecvs;
4866   while (i--) {
4867     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4868   }
4869   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4870   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4871 
4872   if (scall == MAT_INITIAL_MATRIX) {
4873     /* put together the new matrix */
4874     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4875 
4876     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4877     /* Since these are PETSc arrays, change flags to free them as necessary. */
4878     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
4879     b_oth->free_a  = PETSC_TRUE;
4880     b_oth->free_ij = PETSC_TRUE;
4881     b_oth->nonew   = 0;
4882 
4883     ierr = PetscFree(bufj);CHKERRQ(ierr);
4884     if (!startsj_s || !bufa_ptr) {
4885       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4886       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4887     } else {
4888       *startsj_s = sstartsj;
4889       *startsj_r = rstartsj;
4890       *bufa_ptr  = bufa;
4891     }
4892   }
4893   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4894   PetscFunctionReturn(0);
4895 }
4896 
4897 #undef __FUNCT__
4898 #define __FUNCT__ "MatGetCommunicationStructs"
4899 /*@C
4900   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4901 
4902   Not Collective
4903 
4904   Input Parameters:
4905 . A - The matrix in mpiaij format
4906 
4907   Output Parameter:
4908 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4909 . colmap - A map from global column index to local index into lvec
4910 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4911 
4912   Level: developer
4913 
4914 @*/
4915 #if defined(PETSC_USE_CTABLE)
4916 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4917 #else
4918 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4919 #endif
4920 {
4921   Mat_MPIAIJ *a;
4922 
4923   PetscFunctionBegin;
4924   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4925   PetscValidPointer(lvec, 2);
4926   PetscValidPointer(colmap, 3);
4927   PetscValidPointer(multScatter, 4);
4928   a = (Mat_MPIAIJ*) A->data;
4929   if (lvec) *lvec = a->lvec;
4930   if (colmap) *colmap = a->colmap;
4931   if (multScatter) *multScatter = a->Mvctx;
4932   PetscFunctionReturn(0);
4933 }
4934 
4935 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
4936 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
4937 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
4938 #if defined(PETSC_HAVE_ELEMENTAL)
4939 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
4940 #endif
4941 
4942 #undef __FUNCT__
4943 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4944 /*
4945     Computes (B'*A')' since computing B*A directly is untenable
4946 
4947                n                       p                          p
4948         (              )       (              )         (                  )
4949       m (      A       )  *  n (       B      )   =   m (         C        )
4950         (              )       (              )         (                  )
4951 
4952 */
4953 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4954 {
4955   PetscErrorCode ierr;
4956   Mat            At,Bt,Ct;
4957 
4958   PetscFunctionBegin;
4959   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4960   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4961   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4962   ierr = MatDestroy(&At);CHKERRQ(ierr);
4963   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
4964   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4965   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
4966   PetscFunctionReturn(0);
4967 }
4968 
4969 #undef __FUNCT__
4970 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4971 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4972 {
4973   PetscErrorCode ierr;
4974   PetscInt       m=A->rmap->n,n=B->cmap->n;
4975   Mat            Cmat;
4976 
4977   PetscFunctionBegin;
4978   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4979   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
4980   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4981   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
4982   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4983   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
4984   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4985   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4986 
4987   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
4988 
4989   *C = Cmat;
4990   PetscFunctionReturn(0);
4991 }
4992 
4993 /* ----------------------------------------------------------------*/
4994 #undef __FUNCT__
4995 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4996 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4997 {
4998   PetscErrorCode ierr;
4999 
5000   PetscFunctionBegin;
5001   if (scall == MAT_INITIAL_MATRIX) {
5002     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5003     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5004     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5005   }
5006   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5007   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5008   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5009   PetscFunctionReturn(0);
5010 }
5011 
5012 /*MC
5013    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5014 
5015    Options Database Keys:
5016 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5017 
5018   Level: beginner
5019 
5020 .seealso: MatCreateAIJ()
5021 M*/
5022 
5023 #undef __FUNCT__
5024 #define __FUNCT__ "MatCreate_MPIAIJ"
5025 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5026 {
5027   Mat_MPIAIJ     *b;
5028   PetscErrorCode ierr;
5029   PetscMPIInt    size;
5030 
5031   PetscFunctionBegin;
5032   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5033 
5034   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5035   B->data       = (void*)b;
5036   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5037   B->assembled  = PETSC_FALSE;
5038   B->insertmode = NOT_SET_VALUES;
5039   b->size       = size;
5040 
5041   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5042 
5043   /* build cache for off array entries formed */
5044   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5045 
5046   b->donotstash  = PETSC_FALSE;
5047   b->colmap      = 0;
5048   b->garray      = 0;
5049   b->roworiented = PETSC_TRUE;
5050 
5051   /* stuff used for matrix vector multiply */
5052   b->lvec  = NULL;
5053   b->Mvctx = NULL;
5054 
5055   /* stuff for MatGetRow() */
5056   b->rowindices   = 0;
5057   b->rowvalues    = 0;
5058   b->getrowactive = PETSC_FALSE;
5059 
5060   /* flexible pointer used in CUSP/CUSPARSE classes */
5061   b->spptr = NULL;
5062 
5063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5070   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5072   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5073   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5074 #if defined(PETSC_HAVE_ELEMENTAL)
5075   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5076 #endif
5077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5080   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5081   PetscFunctionReturn(0);
5082 }
5083 
5084 #undef __FUNCT__
5085 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5086 /*@C
5087      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5088          and "off-diagonal" part of the matrix in CSR format.
5089 
5090    Collective on MPI_Comm
5091 
5092    Input Parameters:
5093 +  comm - MPI communicator
5094 .  m - number of local rows (Cannot be PETSC_DECIDE)
5095 .  n - This value should be the same as the local size used in creating the
5096        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5097        calculated if N is given) For square matrices n is almost always m.
5098 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5099 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5100 .   i - row indices for "diagonal" portion of matrix
5101 .   j - column indices
5102 .   a - matrix values
5103 .   oi - row indices for "off-diagonal" portion of matrix
5104 .   oj - column indices
5105 -   oa - matrix values
5106 
5107    Output Parameter:
5108 .   mat - the matrix
5109 
5110    Level: advanced
5111 
5112    Notes:
5113        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5114        must free the arrays once the matrix has been destroyed and not before.
5115 
5116        The i and j indices are 0 based
5117 
5118        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5119 
5120        This sets local rows and cannot be used to set off-processor values.
5121 
5122        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5123        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5124        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5125        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5126        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5127        communication if it is known that only local entries will be set.
5128 
5129 .keywords: matrix, aij, compressed row, sparse, parallel
5130 
5131 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5132           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5133 @*/
5134 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5135 {
5136   PetscErrorCode ierr;
5137   Mat_MPIAIJ     *maij;
5138 
5139   PetscFunctionBegin;
5140   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5141   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5142   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5143   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5144   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5145   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5146   maij = (Mat_MPIAIJ*) (*mat)->data;
5147 
5148   (*mat)->preallocated = PETSC_TRUE;
5149 
5150   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5151   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5152 
5153   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5154   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5155 
5156   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5157   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5158   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5159   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5160 
5161   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5162   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5163   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5164   PetscFunctionReturn(0);
5165 }
5166 
5167 /*
5168     Special version for direct calls from Fortran
5169 */
5170 #include <petsc/private/fortranimpl.h>
5171 
5172 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5173 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5174 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5175 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5176 #endif
5177 
5178 /* Change these macros so can be used in void function */
5179 #undef CHKERRQ
5180 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5181 #undef SETERRQ2
5182 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5183 #undef SETERRQ3
5184 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5185 #undef SETERRQ
5186 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5187 
5188 #undef __FUNCT__
5189 #define __FUNCT__ "matsetvaluesmpiaij_"
5190 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5191 {
5192   Mat            mat  = *mmat;
5193   PetscInt       m    = *mm, n = *mn;
5194   InsertMode     addv = *maddv;
5195   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5196   PetscScalar    value;
5197   PetscErrorCode ierr;
5198 
5199   MatCheckPreallocated(mat,1);
5200   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5201 
5202 #if defined(PETSC_USE_DEBUG)
5203   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5204 #endif
5205   {
5206     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5207     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5208     PetscBool roworiented = aij->roworiented;
5209 
5210     /* Some Variables required in the macro */
5211     Mat        A                 = aij->A;
5212     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5213     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5214     MatScalar  *aa               = a->a;
5215     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5216     Mat        B                 = aij->B;
5217     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5218     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5219     MatScalar  *ba               = b->a;
5220 
5221     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5222     PetscInt  nonew = a->nonew;
5223     MatScalar *ap1,*ap2;
5224 
5225     PetscFunctionBegin;
5226     for (i=0; i<m; i++) {
5227       if (im[i] < 0) continue;
5228 #if defined(PETSC_USE_DEBUG)
5229       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5230 #endif
5231       if (im[i] >= rstart && im[i] < rend) {
5232         row      = im[i] - rstart;
5233         lastcol1 = -1;
5234         rp1      = aj + ai[row];
5235         ap1      = aa + ai[row];
5236         rmax1    = aimax[row];
5237         nrow1    = ailen[row];
5238         low1     = 0;
5239         high1    = nrow1;
5240         lastcol2 = -1;
5241         rp2      = bj + bi[row];
5242         ap2      = ba + bi[row];
5243         rmax2    = bimax[row];
5244         nrow2    = bilen[row];
5245         low2     = 0;
5246         high2    = nrow2;
5247 
5248         for (j=0; j<n; j++) {
5249           if (roworiented) value = v[i*n+j];
5250           else value = v[i+j*m];
5251           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5252           if (in[j] >= cstart && in[j] < cend) {
5253             col = in[j] - cstart;
5254             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5255           } else if (in[j] < 0) continue;
5256 #if defined(PETSC_USE_DEBUG)
5257           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5258 #endif
5259           else {
5260             if (mat->was_assembled) {
5261               if (!aij->colmap) {
5262                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5263               }
5264 #if defined(PETSC_USE_CTABLE)
5265               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5266               col--;
5267 #else
5268               col = aij->colmap[in[j]] - 1;
5269 #endif
5270               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5271                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5272                 col  =  in[j];
5273                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5274                 B     = aij->B;
5275                 b     = (Mat_SeqAIJ*)B->data;
5276                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5277                 rp2   = bj + bi[row];
5278                 ap2   = ba + bi[row];
5279                 rmax2 = bimax[row];
5280                 nrow2 = bilen[row];
5281                 low2  = 0;
5282                 high2 = nrow2;
5283                 bm    = aij->B->rmap->n;
5284                 ba    = b->a;
5285               }
5286             } else col = in[j];
5287             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5288           }
5289         }
5290       } else if (!aij->donotstash) {
5291         if (roworiented) {
5292           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5293         } else {
5294           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5295         }
5296       }
5297     }
5298   }
5299   PetscFunctionReturnVoid();
5300 }
5301 
5302