xref: /petsc/src/ksp/pc/impls/gamg/util.c (revision 2e65eb737b5b07432530db55f6b2a145ebc548b2)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <petsc/private/kspimpl.h>
7 
8 /*
9    Produces a set of block column indices of the matrix row, one for each block represented in the original row
10 
11    n - the number of block indices in cc[]
12    cc - the block indices (must be large enough to contain the indices)
13 */
14 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
15 {
16   PetscInt       cnt = -1,nidx,j;
17   const PetscInt *idx;
18 
19   PetscFunctionBegin;
20   PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL));
21   if (nidx) {
22     cnt = 0;
23     cc[cnt] = idx[0]/bs;
24     for (j=1; j<nidx; j++) {
25       if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
26     }
27   }
28   PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL));
29   *n = cnt+1;
30   PetscFunctionReturn(0);
31 }
32 
33 /*
34     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
35 
36     ncollapsed - the number of block indices
37     collapsed - the block indices (must be large enough to contain the indices)
38 */
39 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
40 {
41   PetscInt       i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
42 
43   PetscFunctionBegin;
44   PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev));
45   for (i=start+1; i<start+bs; i++) {
46     PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur));
47     PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged));
48     cprevtmp = cprev; cprev = merged; merged = cprevtmp;
49   }
50   *ncollapsed = nprev;
51   if (collapsed) *collapsed  = cprev;
52   PetscFunctionReturn(0);
53 }
54 
55 /* -------------------------------------------------------------------------- */
56 /*
57    PCGAMGCreateGraph - create simple scaled scalar graph from matrix
58 
59  Input Parameter:
60  . Amat - matrix
61  Output Parameter:
62  . a_Gmaat - eoutput scalar graph (symmetric?)
63  */
64 PetscErrorCode PCGAMGCreateGraph(Mat Amat, Mat *a_Gmat)
65 {
66   PetscInt       Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs;
67   MPI_Comm       comm;
68   Mat            Gmat;
69   PetscBool      ismpiaij,isseqaij;
70 
71   PetscFunctionBegin;
72   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
73   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
74   PetscCall(MatGetSize(Amat, &MM, &NN));
75   PetscCall(MatGetBlockSize(Amat, &bs));
76   nloc = (Iend-Istart)/bs;
77 
78   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij));
79   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij));
80   PetscCheck(isseqaij || ismpiaij,PETSC_COMM_WORLD,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
81 
82   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
83   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
84      implementation */
85   PetscCall(MatViewFromOptions(Amat, NULL, "-g_mat_view"));
86   if (bs > 1 && (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray)) {
87     PetscInt  *d_nnz, *o_nnz;
88     Mat       a, b, c;
89     MatScalar *aa,val,AA[4096];
90     PetscInt  *aj,*ai,AJ[4096],nc;
91     PetscCall(PetscInfo(Amat,"New bs>1 PCGAMGCreateGraph. nloc=%" PetscInt_FMT "\n",nloc));
92     if (isseqaij) {
93       a = Amat; b = NULL;
94     }
95     else {
96       Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data;
97       a = d->A; b = d->B;
98     }
99     PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
100     for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
101       PetscInt       *nnz = (c==a) ? d_nnz : o_nnz, nmax=0;
102       const PetscInt *cols;
103       for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows
104         PetscCall(MatGetRow(c,brow,&jj,&cols,NULL));
105         nnz[brow/bs] = jj/bs;
106         if (jj%bs) ok = 0;
107         if (cols) j0 = cols[0];
108         else j0 = -1;
109         PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL));
110         if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs];
111         for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks
112           PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL));
113           if (jj%bs) ok = 0;
114           if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
115           if (nnz[brow/bs] != jj/bs) ok = 0;
116           PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL));
117         }
118         if (!ok) {
119           PetscCall(PetscFree2(d_nnz,o_nnz));
120           goto old_bs;
121         }
122       }
123       PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax);
124     }
125     PetscCall(MatCreate(comm, &Gmat));
126     PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE));
127     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
128     PetscCall(MatSetType(Gmat, MATAIJ));
129     PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
130     PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
131     PetscCall(PetscFree2(d_nnz,o_nnz));
132     // diag
133     for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows
134       Mat_SeqAIJ *aseq  = (Mat_SeqAIJ*)a->data;
135       ai = aseq->i;
136       n  = ai[brow+1] - ai[brow];
137       aj = aseq->j + ai[brow];
138       for (int k=0; k<n; k += bs) { // block columns
139         AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart)
140         val = 0;
141         for (int ii=0; ii<bs; ii++) { // rows in block
142           aa = aseq->a + ai[brow+ii] + k;
143           for (int jj=0; jj<bs; jj++) { // columns in block
144             val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
145           }
146         }
147         AA[k/bs] = val;
148       }
149       grow = Istart/bs + brow/bs;
150       PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES));
151     }
152     // off-diag
153     if (ismpiaij) {
154       Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)Amat->data;
155       const PetscScalar *vals;
156       const PetscInt    *cols, *garray = aij->garray;
157       PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?");
158       for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows
159         PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL));
160         for (int k=0,cidx=0; k<ncols; k += bs,cidx++) {
161           AA[k/bs] = 0;
162           AJ[cidx] = garray[cols[k]]/bs;
163         }
164         nc = ncols/bs;
165         PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL));
166         for (int ii=0; ii<bs; ii++) { // rows in block
167           PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals));
168           for (int k=0; k<ncols; k += bs) {
169             for (int jj=0; jj<bs; jj++) { // cols in block
170               AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj]));
171             }
172           }
173           PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals));
174         }
175         grow = Istart/bs + brow/bs;
176         PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES));
177       }
178     }
179     PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
180     PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
181     PetscCall(MatViewFromOptions(Gmat, NULL, "-g_mat_view"));
182   } else if (bs > 1) {
183     const PetscScalar *vals;
184     const PetscInt    *idx;
185     PetscInt          *d_nnz, *o_nnz,*w0,*w1,*w2;
186 
187 old_bs:
188     /*
189        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
190     */
191 
192     PetscCall(PetscInfo(Amat,"OLD bs>1 PCGAMGCreateGraph\n"));
193     PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij));
194     PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij));
195     PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
196 
197     if (isseqaij) {
198       PetscInt max_d_nnz;
199 
200       /*
201           Determine exact preallocation count for (sequential) scalar matrix
202       */
203       PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz));
204       max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
205       PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
206       for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
207         PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
208       }
209       PetscCall(PetscFree3(w0,w1,w2));
210 
211     } else if (ismpiaij) {
212       Mat            Daij,Oaij;
213       const PetscInt *garray;
214       PetscInt       max_d_nnz;
215 
216       PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray));
217 
218       /*
219           Determine exact preallocation count for diagonal block portion of scalar matrix
220       */
221       PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz));
222       max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
223       PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
224       for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
225         PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
226       }
227       PetscCall(PetscFree3(w0,w1,w2));
228 
229       /*
230          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
231       */
232       for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
233         o_nnz[jj] = 0;
234         for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
235           PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL));
236           o_nnz[jj] += ncols;
237           PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL));
238         }
239         if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
240       }
241 
242     } else SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"Require AIJ matrix type");
243 
244     /* get scalar copy (norms) of matrix */
245     PetscCall(MatCreate(comm, &Gmat));
246     PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE));
247     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
248     PetscCall(MatSetType(Gmat, MATAIJ));
249     PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
250     PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
251     PetscCall(PetscFree2(d_nnz,o_nnz));
252 
253     for (Ii = Istart; Ii < Iend; Ii++) {
254       PetscInt dest_row = Ii/bs;
255       PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals));
256       for (jj=0; jj<ncols; jj++) {
257         PetscInt    dest_col = idx[jj]/bs;
258         PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
259         PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES));
260       }
261       PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals));
262     }
263     PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
264     PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
265     PetscCall(MatViewFromOptions(Gmat, NULL, "-g_mat_view"));
266   } else {
267     /* just copy scalar matrix - abs() not taken here but scaled later */
268     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
269   }
270   PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
271 
272   *a_Gmat = Gmat;
273   PetscFunctionReturn(0);
274 }
275 
276 /* -------------------------------------------------------------------------- */
277 /*@C
278    PCGAMGFilterGraph - filter (remove zero and possibly small values from the) graph and make it symmetric if requested
279 
280    Collective on Mat
281 
282    Input Parameters:
283 +   a_Gmat - the graph
284 .   vfilter - threshold parameter [0,1)
285 -   symm - make the result symmetric
286 
287    Level: developer
288 
289    Notes:
290     This is called before graph coarsers are called.
291 
292 .seealso: PCGAMGSetThreshold()
293 @*/
294 PetscErrorCode PCGAMGFilterGraph(Mat *a_Gmat,PetscReal vfilter,PetscBool symm)
295 {
296   PetscInt          Istart,Iend,Ii,jj,ncols,nnz0,nnz1, NN, MM, nloc;
297   PetscMPIInt       rank;
298   Mat               Gmat  = *a_Gmat, tGmat;
299   MPI_Comm          comm;
300   const PetscScalar *vals;
301   const PetscInt    *idx;
302   PetscInt          *d_nnz, *o_nnz;
303   Vec               diag;
304 
305   PetscFunctionBegin;
306   /* TODO GPU: optimization proposal, each class provides fast implementation of this
307      procedure via MatAbs API */
308   if (vfilter < 0.0 && !symm) {
309     /* Just use the provided matrix as the graph but make all values positive */
310     MatInfo     info;
311     PetscScalar *avals;
312     PetscBool isaij,ismpiaij;
313     PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isaij));
314     PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATMPIAIJ,&ismpiaij));
315     PetscCheck(isaij || ismpiaij,PETSC_COMM_WORLD,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
316     if (isaij) {
317       PetscCall(MatGetInfo(Gmat,MAT_LOCAL,&info));
318       PetscCall(MatSeqAIJGetArray(Gmat,&avals));
319       for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
320       PetscCall(MatSeqAIJRestoreArray(Gmat,&avals));
321     } else {
322       Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)Gmat->data;
323       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
324       PetscCall(MatSeqAIJGetArray(aij->A,&avals));
325       for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
326       PetscCall(MatSeqAIJRestoreArray(aij->A,&avals));
327       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
328       PetscCall(MatSeqAIJGetArray(aij->B,&avals));
329       for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
330       PetscCall(MatSeqAIJRestoreArray(aij->B,&avals));
331     }
332     PetscFunctionReturn(0);
333   }
334 
335   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
336                Also, if the matrix is symmetric, can we skip this
337                operation? It can be very expensive on large matrices. */
338   PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm));
339   PetscCallMPI(MPI_Comm_rank(comm,&rank));
340   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
341   nloc = Iend - Istart;
342   PetscCall(MatGetSize(Gmat, &MM, &NN));
343 
344   if (symm) {
345     Mat matTrans;
346     PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
347     PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
348     PetscCall(MatDestroy(&matTrans));
349   }
350 
351   /* scale Gmat for all values between -1 and 1 */
352   PetscCall(MatCreateVecs(Gmat, &diag, NULL));
353   PetscCall(MatGetDiagonal(Gmat, diag));
354   PetscCall(VecReciprocal(diag));
355   PetscCall(VecSqrtAbs(diag));
356   PetscCall(MatDiagonalScale(Gmat, diag, diag));
357   PetscCall(VecDestroy(&diag));
358 
359   /* Determine upper bound on nonzeros needed in new filtered matrix */
360   PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz));
361   for (Ii = Istart, jj = 0; Ii < Iend; Ii++, jj++) {
362     PetscCall(MatGetRow(Gmat,Ii,&ncols,NULL,NULL));
363     d_nnz[jj] = ncols;
364     o_nnz[jj] = ncols;
365     PetscCall(MatRestoreRow(Gmat,Ii,&ncols,NULL,NULL));
366     if (d_nnz[jj] > nloc) d_nnz[jj] = nloc;
367     if (o_nnz[jj] > (MM-nloc)) o_nnz[jj] = MM - nloc;
368   }
369   PetscCall(MatCreate(comm, &tGmat));
370   PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM));
371   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
372   PetscCall(MatSetType(tGmat, MATAIJ));
373   PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz));
374   PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz));
375   PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
376   PetscCall(PetscFree2(d_nnz,o_nnz));
377 
378   for (Ii = Istart, nnz0 = nnz1 = 0; Ii < Iend; Ii++) {
379     PetscCall(MatGetRow(Gmat,Ii,&ncols,&idx,&vals));
380     for (jj=0; jj<ncols; jj++,nnz0++) {
381       PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
382       if (PetscRealPart(sv) > vfilter) {
383         nnz1++;
384         PetscCall(MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,INSERT_VALUES));
385       }
386     }
387     PetscCall(MatRestoreRow(Gmat,Ii,&ncols,&idx,&vals));
388   }
389   PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY));
390   PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY));
391   if (symm) {
392     PetscCall(MatSetOption(tGmat,MAT_SYMMETRIC,PETSC_TRUE));
393   } else {
394     PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat));
395   }
396 
397 #if defined(PETSC_USE_INFO)
398   {
399     double t1 = (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, t2 = (!nloc) ? 1. : (double)nnz0/(double)nloc;
400     PetscCall(PetscInfo(*a_Gmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ")\n",t1,(double)vfilter,t2,MM));
401   }
402 #endif
403   PetscCall(MatDestroy(&Gmat));
404   *a_Gmat = tGmat;
405   PetscFunctionReturn(0);
406 }
407 
408 /* -------------------------------------------------------------------------- */
409 /*
410    PCGAMGGetDataWithGhosts - hacks into Mat MPIAIJ so this must have size > 1
411 
412    Input Parameter:
413    . Gmat - MPIAIJ matrix for scattters
414    . data_sz - number of data terms per node (# cols in output)
415    . data_in[nloc*data_sz] - column oriented data
416    Output Parameter:
417    . a_stride - numbrt of rows of output
418    . a_data_out[stride*data_sz] - output data with ghosts
419 */
420 PetscErrorCode PCGAMGGetDataWithGhosts(Mat Gmat,PetscInt data_sz,PetscReal data_in[],PetscInt *a_stride,PetscReal **a_data_out)
421 {
422   Vec            tmp_crds;
423   Mat_MPIAIJ     *mpimat = (Mat_MPIAIJ*)Gmat->data;
424   PetscInt       nnodes,num_ghosts,dir,kk,jj,my0,Iend,nloc;
425   PetscScalar    *data_arr;
426   PetscReal      *datas;
427   PetscBool      isMPIAIJ;
428 
429   PetscFunctionBegin;
430   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATMPIAIJ, &isMPIAIJ));
431   PetscCall(MatGetOwnershipRange(Gmat, &my0, &Iend));
432   nloc      = Iend - my0;
433   PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts));
434   nnodes    = num_ghosts + nloc;
435   *a_stride = nnodes;
436   PetscCall(MatCreateVecs(Gmat, &tmp_crds, NULL));
437 
438   PetscCall(PetscMalloc1(data_sz*nnodes, &datas));
439   for (dir=0; dir<data_sz; dir++) {
440     /* set local, and global */
441     for (kk=0; kk<nloc; kk++) {
442       PetscInt    gid = my0 + kk;
443       PetscScalar crd = (PetscScalar)data_in[dir*nloc + kk]; /* col oriented */
444       datas[dir*nnodes + kk] = PetscRealPart(crd);
445 
446       PetscCall(VecSetValues(tmp_crds, 1, &gid, &crd, INSERT_VALUES));
447     }
448     PetscCall(VecAssemblyBegin(tmp_crds));
449     PetscCall(VecAssemblyEnd(tmp_crds));
450     /* get ghost datas */
451     PetscCall(VecScatterBegin(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD));
452     PetscCall(VecScatterEnd(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD));
453     PetscCall(VecGetArray(mpimat->lvec, &data_arr));
454     for (kk=nloc,jj=0;jj<num_ghosts;kk++,jj++) datas[dir*nnodes + kk] = PetscRealPart(data_arr[jj]);
455     PetscCall(VecRestoreArray(mpimat->lvec, &data_arr));
456   }
457   PetscCall(VecDestroy(&tmp_crds));
458   *a_data_out = datas;
459   PetscFunctionReturn(0);
460 }
461 
462 PetscErrorCode PCGAMGHashTableCreate(PetscInt a_size, PCGAMGHashTable *a_tab)
463 {
464   PetscInt       kk;
465 
466   PetscFunctionBegin;
467   a_tab->size = a_size;
468   PetscCall(PetscMalloc2(a_size, &a_tab->table,a_size, &a_tab->data));
469   for (kk=0; kk<a_size; kk++) a_tab->table[kk] = -1;
470   PetscFunctionReturn(0);
471 }
472 
473 PetscErrorCode PCGAMGHashTableDestroy(PCGAMGHashTable *a_tab)
474 {
475   PetscFunctionBegin;
476   PetscCall(PetscFree2(a_tab->table,a_tab->data));
477   PetscFunctionReturn(0);
478 }
479 
480 PetscErrorCode PCGAMGHashTableAdd(PCGAMGHashTable *a_tab, PetscInt a_key, PetscInt a_data)
481 {
482   PetscInt kk,idx;
483 
484   PetscFunctionBegin;
485   PetscCheck(a_key>=0,PETSC_COMM_SELF,PETSC_ERR_USER,"Negative key %" PetscInt_FMT ".",a_key);
486   for (kk = 0, idx = GAMG_HASH(a_key); kk < a_tab->size; kk++, idx = (idx==(a_tab->size-1)) ? 0 : idx + 1) {
487     if (a_tab->table[idx] == a_key) {
488       /* exists */
489       a_tab->data[idx] = a_data;
490       break;
491     } else if (a_tab->table[idx] == -1) {
492       /* add */
493       a_tab->table[idx] = a_key;
494       a_tab->data[idx]  = a_data;
495       break;
496     }
497   }
498   if (kk==a_tab->size) {
499     /* this is not to efficient, waiting until completely full */
500     PetscInt       oldsize = a_tab->size, new_size = 2*a_tab->size + 5, *oldtable = a_tab->table, *olddata = a_tab->data;
501 
502     a_tab->size = new_size;
503     PetscCall(PetscMalloc2(a_tab->size, &a_tab->table,a_tab->size, &a_tab->data));
504     for (kk=0;kk<a_tab->size;kk++) a_tab->table[kk] = -1;
505     for (kk=0;kk<oldsize;kk++) {
506       if (oldtable[kk] != -1) {
507         PetscCall(PCGAMGHashTableAdd(a_tab, oldtable[kk], olddata[kk]));
508        }
509     }
510     PetscCall(PetscFree2(oldtable,olddata));
511     PetscCall(PCGAMGHashTableAdd(a_tab, a_key, a_data));
512   }
513   PetscFunctionReturn(0);
514 }
515