xref: /petsc/src/ksp/pc/impls/gamg/util.c (revision 3c859ba3a04a72e8efcb87bc7ffc046a6cbab413)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <petsc/private/kspimpl.h>
7 
8 /*
9    Produces a set of block column indices of the matrix row, one for each block represented in the original row
10 
11    n - the number of block indices in cc[]
12    cc - the block indices (must be large enough to contain the indices)
13 */
14 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
15 {
16   PetscInt       cnt = -1,nidx,j;
17   const PetscInt *idx;
18   PetscErrorCode ierr;
19 
20   PetscFunctionBegin;
21   ierr = MatGetRow(Amat,row,&nidx,&idx,NULL);CHKERRQ(ierr);
22   if (nidx) {
23     cnt = 0;
24     cc[cnt] = idx[0]/bs;
25     for (j=1; j<nidx; j++) {
26       if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
27     }
28   }
29   ierr = MatRestoreRow(Amat,row,&nidx,&idx,NULL);CHKERRQ(ierr);
30   *n = cnt+1;
31   PetscFunctionReturn(0);
32 }
33 
34 /*
35     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
36 
37     ncollapsed - the number of block indices
38     collapsed - the block indices (must be large enough to contain the indices)
39 */
40 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
41 {
42   PetscInt       i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
43   PetscErrorCode ierr;
44 
45   PetscFunctionBegin;
46   ierr = MatCollapseRow(Amat,start,bs,&nprev,cprev);CHKERRQ(ierr);
47   for (i=start+1; i<start+bs; i++) {
48     ierr  = MatCollapseRow(Amat,i,bs,&ncur,ccur);CHKERRQ(ierr);
49     ierr  = PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged);CHKERRQ(ierr);
50     cprevtmp = cprev; cprev = merged; merged = cprevtmp;
51   }
52   *ncollapsed = nprev;
53   if (collapsed) *collapsed  = cprev;
54   PetscFunctionReturn(0);
55 }
56 
57 /* -------------------------------------------------------------------------- */
58 /*
59    PCGAMGCreateGraph - create simple scaled scalar graph from matrix
60 
61  Input Parameter:
62  . Amat - matrix
63  Output Parameter:
64  . a_Gmaat - eoutput scalar graph (symmetric?)
65  */
66 PetscErrorCode PCGAMGCreateGraph(Mat Amat, Mat *a_Gmat)
67 {
68   PetscErrorCode ierr;
69   PetscInt       Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs;
70   MPI_Comm       comm;
71   Mat            Gmat;
72   PetscBool      ismpiaij,isseqaij;
73 
74   PetscFunctionBegin;
75   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
76   ierr = MatGetOwnershipRange(Amat, &Istart, &Iend);CHKERRQ(ierr);
77   ierr = MatGetSize(Amat, &MM, &NN);CHKERRQ(ierr);
78   ierr = MatGetBlockSize(Amat, &bs);CHKERRQ(ierr);
79   nloc = (Iend-Istart)/bs;
80 
81   ierr = PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
82   ierr = PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr);
83   PetscCheckFalse(!isseqaij && !ismpiaij,PETSC_COMM_WORLD,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
84   ierr = PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr);
85 
86   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
87   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
88      implementation */
89   ierr = MatViewFromOptions(Amat, NULL, "-g_mat_view");CHKERRQ(ierr);
90   if (bs > 1 && (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray)) {
91     PetscInt  *d_nnz, *o_nnz;
92     Mat       a, b, c;
93     MatScalar *aa,val,AA[4096];
94     PetscInt  *aj,*ai,AJ[4096],nc;
95     ierr = PetscInfo(Amat,"New bs>1 PCGAMGCreateGraph. nloc=%D\n",nloc);CHKERRQ(ierr);
96     if (isseqaij) {
97       a = Amat; b = NULL;
98     }
99     else {
100       Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data;
101       a = d->A; b = d->B;
102     }
103     ierr = PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz);CHKERRQ(ierr);
104     for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
105       PetscInt       *nnz = (c==a) ? d_nnz : o_nnz, nmax=0;
106       const PetscInt *cols;
107       for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows
108         ierr = MatGetRow(c,brow,&jj,&cols,NULL);CHKERRQ(ierr);
109         nnz[brow/bs] = jj/bs;
110         if (jj%bs) ok = 0;
111         if (cols) j0 = cols[0];
112         else j0 = -1;
113         ierr = MatRestoreRow(c,brow,&jj,&cols,NULL);CHKERRQ(ierr);
114         if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs];
115         for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks
116           ierr = MatGetRow(c,brow+ii,&jj,&cols,NULL);CHKERRQ(ierr);
117           if (jj%bs) ok = 0;
118           if (j0 != cols[0]) ok = 0;
119           if (nnz[brow/bs] != jj/bs) ok = 0;
120           ierr = MatRestoreRow(c,brow+11,&jj,&cols,NULL);CHKERRQ(ierr);
121         }
122         if (!ok) {
123           ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
124           goto old_bs;
125         }
126       }
127       PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %D too small %D.",nmax,4096);
128     }
129     ierr = MatCreate(comm, &Gmat);CHKERRQ(ierr);
130     ierr = MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
131     ierr = MatSetBlockSizes(Gmat, 1, 1);CHKERRQ(ierr);
132     ierr = MatSetType(Gmat, MATAIJ);CHKERRQ(ierr);
133     ierr = MatSeqAIJSetPreallocation(Gmat,0,d_nnz);CHKERRQ(ierr);
134     ierr = MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
135     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
136     // diag
137     for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows
138       Mat_SeqAIJ *aseq  = (Mat_SeqAIJ*)a->data;
139       ai = aseq->i;
140       n  = ai[brow+1] - ai[brow];
141       aj = aseq->j + ai[brow];
142       for (int k=0; k<n; k += bs) { // block columns
143         AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart)
144         val = 0;
145         for (int ii=0; ii<bs; ii++) { // rows in block
146           aa = aseq->a + ai[brow+ii] + k;
147           for (int jj=0; jj<bs; jj++) { // columns in block
148             val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
149           }
150         }
151         AA[k/bs] = val;
152       }
153       grow = Istart/bs + brow/bs;
154       ierr = MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES);CHKERRQ(ierr);
155     }
156     // off-diag
157     if (ismpiaij) {
158       Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)Amat->data;
159       const PetscScalar *vals;
160       const PetscInt    *cols, *garray = aij->garray;
161       PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?");
162       for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows
163         ierr = MatGetRow(b,brow,&ncols,&cols,NULL);CHKERRQ(ierr);
164         for (int k=0,cidx=0; k<ncols; k += bs,cidx++) {
165           AA[k/bs] = 0;
166           AJ[cidx] = garray[cols[k]]/bs;
167         }
168         nc = ncols/bs;
169         ierr = MatRestoreRow(b,brow,&ncols,&cols,NULL);CHKERRQ(ierr);
170         for (int ii=0; ii<bs; ii++) { // rows in block
171           ierr = MatGetRow(b,brow+ii,&ncols,&cols,&vals);CHKERRQ(ierr);
172           for (int k=0; k<ncols; k += bs) {
173             for (int jj=0; jj<bs; jj++) { // cols in block
174               AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj]));
175             }
176           }
177           ierr = MatRestoreRow(b,brow+ii,&ncols,&cols,&vals);CHKERRQ(ierr);
178         }
179         grow = Istart/bs + brow/bs;
180         ierr = MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES);CHKERRQ(ierr);
181       }
182     }
183     ierr = MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
184     ierr = MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
185     ierr = MatViewFromOptions(Gmat, NULL, "-g_mat_view");CHKERRQ(ierr);
186   } else if (bs > 1) {
187     const PetscScalar *vals;
188     const PetscInt    *idx;
189     PetscInt          *d_nnz, *o_nnz,*w0,*w1,*w2;
190 
191 old_bs:
192     /*
193        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
194     */
195 
196     ierr = PetscInfo(Amat,"OLD bs>1 PCGAMGCreateGraph\n");CHKERRQ(ierr);
197     ierr = PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
198     ierr = PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr);
199     ierr = PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz);CHKERRQ(ierr);
200 
201     if (isseqaij) {
202       PetscInt max_d_nnz;
203 
204       /*
205           Determine exact preallocation count for (sequential) scalar matrix
206       */
207       ierr = MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz);CHKERRQ(ierr);
208       max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
209       ierr = PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2);CHKERRQ(ierr);
210       for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
211         ierr = MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL);CHKERRQ(ierr);
212       }
213       ierr = PetscFree3(w0,w1,w2);CHKERRQ(ierr);
214 
215     } else if (ismpiaij) {
216       Mat            Daij,Oaij;
217       const PetscInt *garray;
218       PetscInt       max_d_nnz;
219 
220       ierr = MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray);CHKERRQ(ierr);
221 
222       /*
223           Determine exact preallocation count for diagonal block portion of scalar matrix
224       */
225       ierr = MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz);CHKERRQ(ierr);
226       max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
227       ierr = PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2);CHKERRQ(ierr);
228       for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
229         ierr = MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL);CHKERRQ(ierr);
230       }
231       ierr = PetscFree3(w0,w1,w2);CHKERRQ(ierr);
232 
233       /*
234          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
235       */
236       for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
237         o_nnz[jj] = 0;
238         for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
239           ierr = MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL);CHKERRQ(ierr);
240           o_nnz[jj] += ncols;
241           ierr = MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL);CHKERRQ(ierr);
242         }
243         if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
244       }
245 
246     } else SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"Require AIJ matrix type");
247 
248     /* get scalar copy (norms) of matrix */
249     ierr = MatCreate(comm, &Gmat);CHKERRQ(ierr);
250     ierr = MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
251     ierr = MatSetBlockSizes(Gmat, 1, 1);CHKERRQ(ierr);
252     ierr = MatSetType(Gmat, MATAIJ);CHKERRQ(ierr);
253     ierr = MatSeqAIJSetPreallocation(Gmat,0,d_nnz);CHKERRQ(ierr);
254     ierr = MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
255     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
256 
257     for (Ii = Istart; Ii < Iend; Ii++) {
258       PetscInt dest_row = Ii/bs;
259       ierr = MatGetRow(Amat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr);
260       for (jj=0; jj<ncols; jj++) {
261         PetscInt    dest_col = idx[jj]/bs;
262         PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
263         ierr = MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES);CHKERRQ(ierr);
264       }
265       ierr = MatRestoreRow(Amat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr);
266     }
267     ierr = MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
268     ierr = MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
269     ierr = MatViewFromOptions(Gmat, NULL, "-g_mat_view");CHKERRQ(ierr);
270   } else {
271     /* just copy scalar matrix - abs() not taken here but scaled later */
272     ierr = MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat);CHKERRQ(ierr);
273   }
274   ierr = MatPropagateSymmetryOptions(Amat, Gmat);CHKERRQ(ierr);
275 
276   ierr = PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr);
277 
278   *a_Gmat = Gmat;
279   PetscFunctionReturn(0);
280 }
281 
282 /* -------------------------------------------------------------------------- */
283 /*@C
284    PCGAMGFilterGraph - filter (remove zero and possibly small values from the) graph and make it symmetric if requested
285 
286    Collective on Mat
287 
288    Input Parameters:
289 +   a_Gmat - the graph
290 .   vfilter - threshold parameter [0,1)
291 -   symm - make the result symmetric
292 
293    Level: developer
294 
295    Notes:
296     This is called before graph coarsers are called.
297 
298 .seealso: PCGAMGSetThreshold()
299 @*/
300 PetscErrorCode PCGAMGFilterGraph(Mat *a_Gmat,PetscReal vfilter,PetscBool symm)
301 {
302   PetscErrorCode    ierr;
303   PetscInt          Istart,Iend,Ii,jj,ncols,nnz0,nnz1, NN, MM, nloc;
304   PetscMPIInt       rank;
305   Mat               Gmat  = *a_Gmat, tGmat;
306   MPI_Comm          comm;
307   const PetscScalar *vals;
308   const PetscInt    *idx;
309   PetscInt          *d_nnz, *o_nnz;
310   Vec               diag;
311 
312   PetscFunctionBegin;
313   ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET16],0,0,0,0);CHKERRQ(ierr);
314 
315   /* TODO GPU: optimization proposal, each class provides fast implementation of this
316      procedure via MatAbs API */
317   if (vfilter < 0.0 && !symm) {
318     /* Just use the provided matrix as the graph but make all values positive */
319     MatInfo     info;
320     PetscScalar *avals;
321     PetscBool isaij,ismpiaij;
322     ierr = PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isaij);CHKERRQ(ierr);
323     ierr = PetscObjectBaseTypeCompare((PetscObject)Gmat,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr);
324     PetscCheckFalse(!isaij && !ismpiaij,PETSC_COMM_WORLD,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
325     if (isaij) {
326       ierr = MatGetInfo(Gmat,MAT_LOCAL,&info);CHKERRQ(ierr);
327       ierr = MatSeqAIJGetArray(Gmat,&avals);CHKERRQ(ierr);
328       for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
329       ierr = MatSeqAIJRestoreArray(Gmat,&avals);CHKERRQ(ierr);
330     } else {
331       Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)Gmat->data;
332       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
333       ierr = MatSeqAIJGetArray(aij->A,&avals);CHKERRQ(ierr);
334       for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
335       ierr = MatSeqAIJRestoreArray(aij->A,&avals);CHKERRQ(ierr);
336       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
337       ierr = MatSeqAIJGetArray(aij->B,&avals);CHKERRQ(ierr);
338       for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
339       ierr = MatSeqAIJRestoreArray(aij->B,&avals);CHKERRQ(ierr);
340     }
341     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET16],0,0,0,0);CHKERRQ(ierr);
342     PetscFunctionReturn(0);
343   }
344 
345   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
346                Also, if the matrix is symmetric, can we skip this
347                operation? It can be very expensive on large matrices. */
348   ierr = PetscObjectGetComm((PetscObject)Gmat,&comm);CHKERRQ(ierr);
349   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
350   ierr = MatGetOwnershipRange(Gmat, &Istart, &Iend);CHKERRQ(ierr);
351   nloc = Iend - Istart;
352   ierr = MatGetSize(Gmat, &MM, &NN);CHKERRQ(ierr);
353 
354   if (symm) {
355     Mat matTrans;
356     ierr = MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans);CHKERRQ(ierr);
357     ierr = MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
358     ierr = MatDestroy(&matTrans);CHKERRQ(ierr);
359   }
360 
361   /* scale Gmat for all values between -1 and 1 */
362   ierr = MatCreateVecs(Gmat, &diag, NULL);CHKERRQ(ierr);
363   ierr = MatGetDiagonal(Gmat, diag);CHKERRQ(ierr);
364   ierr = VecReciprocal(diag);CHKERRQ(ierr);
365   ierr = VecSqrtAbs(diag);CHKERRQ(ierr);
366   ierr = MatDiagonalScale(Gmat, diag, diag);CHKERRQ(ierr);
367   ierr = VecDestroy(&diag);CHKERRQ(ierr);
368 
369   /* Determine upper bound on nonzeros needed in new filtered matrix */
370   ierr = PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz);CHKERRQ(ierr);
371   for (Ii = Istart, jj = 0; Ii < Iend; Ii++, jj++) {
372     ierr      = MatGetRow(Gmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
373     d_nnz[jj] = ncols;
374     o_nnz[jj] = ncols;
375     ierr      = MatRestoreRow(Gmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
376     if (d_nnz[jj] > nloc) d_nnz[jj] = nloc;
377     if (o_nnz[jj] > (MM-nloc)) o_nnz[jj] = MM - nloc;
378   }
379   ierr = MatCreate(comm, &tGmat);CHKERRQ(ierr);
380   ierr = MatSetSizes(tGmat,nloc,nloc,MM,MM);CHKERRQ(ierr);
381   ierr = MatSetBlockSizes(tGmat, 1, 1);CHKERRQ(ierr);
382   ierr = MatSetType(tGmat, MATAIJ);CHKERRQ(ierr);
383   ierr = MatSeqAIJSetPreallocation(tGmat,0,d_nnz);CHKERRQ(ierr);
384   ierr = MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
385   ierr = MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
386   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
387 
388   for (Ii = Istart, nnz0 = nnz1 = 0; Ii < Iend; Ii++) {
389     ierr = MatGetRow(Gmat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr);
390     for (jj=0; jj<ncols; jj++,nnz0++) {
391       PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
392       if (PetscRealPart(sv) > vfilter) {
393         nnz1++;
394         ierr = MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,INSERT_VALUES);CHKERRQ(ierr);
395       }
396     }
397     ierr = MatRestoreRow(Gmat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr);
398   }
399   ierr = MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
400   ierr = MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
401   if (symm) {
402     ierr = MatSetOption(tGmat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
403   } else {
404     ierr = MatPropagateSymmetryOptions(Gmat,tGmat);CHKERRQ(ierr);
405   }
406   ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET16],0,0,0,0);CHKERRQ(ierr);
407 
408 #if defined(PETSC_USE_INFO)
409   {
410     double t1 = (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, t2 = (!nloc) ? 1. : (double)nnz0/(double)nloc;
411     ierr = PetscInfo(*a_Gmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%D)\n",t1,vfilter,t2,MM);CHKERRQ(ierr);
412   }
413 #endif
414   ierr    = MatDestroy(&Gmat);CHKERRQ(ierr);
415   *a_Gmat = tGmat;
416   PetscFunctionReturn(0);
417 }
418 
419 /* -------------------------------------------------------------------------- */
420 /*
421    PCGAMGGetDataWithGhosts - hacks into Mat MPIAIJ so this must have size > 1
422 
423    Input Parameter:
424    . Gmat - MPIAIJ matrix for scattters
425    . data_sz - number of data terms per node (# cols in output)
426    . data_in[nloc*data_sz] - column oriented data
427    Output Parameter:
428    . a_stride - numbrt of rows of output
429    . a_data_out[stride*data_sz] - output data with ghosts
430 */
431 PetscErrorCode PCGAMGGetDataWithGhosts(Mat Gmat,PetscInt data_sz,PetscReal data_in[],PetscInt *a_stride,PetscReal **a_data_out)
432 {
433   PetscErrorCode ierr;
434   Vec            tmp_crds;
435   Mat_MPIAIJ     *mpimat = (Mat_MPIAIJ*)Gmat->data;
436   PetscInt       nnodes,num_ghosts,dir,kk,jj,my0,Iend,nloc;
437   PetscScalar    *data_arr;
438   PetscReal      *datas;
439   PetscBool      isMPIAIJ;
440 
441   PetscFunctionBegin;
442   ierr      = PetscObjectBaseTypeCompare((PetscObject)Gmat, MATMPIAIJ, &isMPIAIJ);CHKERRQ(ierr);
443   ierr      = MatGetOwnershipRange(Gmat, &my0, &Iend);CHKERRQ(ierr);
444   nloc      = Iend - my0;
445   ierr      = VecGetLocalSize(mpimat->lvec, &num_ghosts);CHKERRQ(ierr);
446   nnodes    = num_ghosts + nloc;
447   *a_stride = nnodes;
448   ierr      = MatCreateVecs(Gmat, &tmp_crds, NULL);CHKERRQ(ierr);
449 
450   ierr = PetscMalloc1(data_sz*nnodes, &datas);CHKERRQ(ierr);
451   for (dir=0; dir<data_sz; dir++) {
452     /* set local, and global */
453     for (kk=0; kk<nloc; kk++) {
454       PetscInt    gid = my0 + kk;
455       PetscScalar crd = (PetscScalar)data_in[dir*nloc + kk]; /* col oriented */
456       datas[dir*nnodes + kk] = PetscRealPart(crd);
457 
458       ierr = VecSetValues(tmp_crds, 1, &gid, &crd, INSERT_VALUES);CHKERRQ(ierr);
459     }
460     ierr = VecAssemblyBegin(tmp_crds);CHKERRQ(ierr);
461     ierr = VecAssemblyEnd(tmp_crds);CHKERRQ(ierr);
462     /* get ghost datas */
463     ierr = VecScatterBegin(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
464     ierr = VecScatterEnd(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
465     ierr = VecGetArray(mpimat->lvec, &data_arr);CHKERRQ(ierr);
466     for (kk=nloc,jj=0;jj<num_ghosts;kk++,jj++) datas[dir*nnodes + kk] = PetscRealPart(data_arr[jj]);
467     ierr = VecRestoreArray(mpimat->lvec, &data_arr);CHKERRQ(ierr);
468   }
469   ierr        = VecDestroy(&tmp_crds);CHKERRQ(ierr);
470   *a_data_out = datas;
471   PetscFunctionReturn(0);
472 }
473 
474 PetscErrorCode PCGAMGHashTableCreate(PetscInt a_size, PCGAMGHashTable *a_tab)
475 {
476   PetscErrorCode ierr;
477   PetscInt       kk;
478 
479   PetscFunctionBegin;
480   a_tab->size = a_size;
481   ierr = PetscMalloc2(a_size, &a_tab->table,a_size, &a_tab->data);CHKERRQ(ierr);
482   for (kk=0; kk<a_size; kk++) a_tab->table[kk] = -1;
483   PetscFunctionReturn(0);
484 }
485 
486 PetscErrorCode PCGAMGHashTableDestroy(PCGAMGHashTable *a_tab)
487 {
488   PetscErrorCode ierr;
489 
490   PetscFunctionBegin;
491   ierr = PetscFree2(a_tab->table,a_tab->data);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 PetscErrorCode PCGAMGHashTableAdd(PCGAMGHashTable *a_tab, PetscInt a_key, PetscInt a_data)
496 {
497   PetscInt kk,idx;
498 
499   PetscFunctionBegin;
500   PetscCheckFalse(a_key<0,PETSC_COMM_SELF,PETSC_ERR_USER,"Negative key %D.",a_key);
501   for (kk = 0, idx = GAMG_HASH(a_key); kk < a_tab->size; kk++, idx = (idx==(a_tab->size-1)) ? 0 : idx + 1) {
502     if (a_tab->table[idx] == a_key) {
503       /* exists */
504       a_tab->data[idx] = a_data;
505       break;
506     } else if (a_tab->table[idx] == -1) {
507       /* add */
508       a_tab->table[idx] = a_key;
509       a_tab->data[idx]  = a_data;
510       break;
511     }
512   }
513   if (kk==a_tab->size) {
514     /* this is not to efficient, waiting until completely full */
515     PetscInt       oldsize = a_tab->size, new_size = 2*a_tab->size + 5, *oldtable = a_tab->table, *olddata = a_tab->data;
516     PetscErrorCode ierr;
517 
518     a_tab->size = new_size;
519     ierr = PetscMalloc2(a_tab->size, &a_tab->table,a_tab->size, &a_tab->data);CHKERRQ(ierr);
520     for (kk=0;kk<a_tab->size;kk++) a_tab->table[kk] = -1;
521     for (kk=0;kk<oldsize;kk++) {
522       if (oldtable[kk] != -1) {
523         ierr = PCGAMGHashTableAdd(a_tab, oldtable[kk], olddata[kk]);CHKERRQ(ierr);
524        }
525     }
526     ierr = PetscFree2(oldtable,olddata);CHKERRQ(ierr);
527     ierr = PCGAMGHashTableAdd(a_tab, a_key, a_data);CHKERRQ(ierr);
528   }
529   PetscFunctionReturn(0);
530 }
531