xref: /petsc/src/ksp/pc/impls/gamg/util.c (revision 40cbb1a031ea8f2be4fe2b92dc842b003ad37be3)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <petsc/private/kspimpl.h>
7 
8 /*
9    Produces a set of block column indices of the matrix row, one for each block represented in the original row
10 
11    n - the number of block indices in cc[]
12    cc - the block indices (must be large enough to contain the indices)
13 */
14 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
15 {
16   PetscInt       cnt = -1,nidx,j;
17   const PetscInt *idx;
18 
19   PetscFunctionBegin;
20   PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL));
21   if (nidx) {
22     cnt = 0;
23     cc[cnt] = idx[0]/bs;
24     for (j=1; j<nidx; j++) {
25       if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
26     }
27   }
28   PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL));
29   *n = cnt+1;
30   PetscFunctionReturn(0);
31 }
32 
33 /*
34     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
35 
36     ncollapsed - the number of block indices
37     collapsed - the block indices (must be large enough to contain the indices)
38 */
39 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
40 {
41   PetscInt       i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
42 
43   PetscFunctionBegin;
44   PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev));
45   for (i=start+1; i<start+bs; i++) {
46     PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur));
47     PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged));
48     cprevtmp = cprev; cprev = merged; merged = cprevtmp;
49   }
50   *ncollapsed = nprev;
51   if (collapsed) *collapsed  = cprev;
52   PetscFunctionReturn(0);
53 }
54 
55 /* -------------------------------------------------------------------------- */
56 /*
57    PCGAMGCreateGraph - create simple scaled scalar graph from matrix
58 
59  Input Parameter:
60  . Amat - matrix
61  - symm - make the result symmetric
62 
63  Output Parameter:
64  . a_Gmat - output scalar graph >= 0
65 
66  */
67 PetscErrorCode PCGAMGCreateGraph(Mat Amat, Mat *a_Gmat, PetscBool symm)
68 {
69   PetscInt       Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs;
70   MPI_Comm       comm;
71   Mat            Gmat;
72   PetscBool      ismpiaij,isseqaij;
73   Mat            a, b, c;
74 
75   PetscFunctionBegin;
76   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
77   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
78   PetscCall(MatGetSize(Amat, &MM, &NN));
79   PetscCall(MatGetBlockSize(Amat, &bs));
80   nloc = (Iend-Istart)/bs;
81 
82   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij));
83   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij));
84   PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
85 
86   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
87   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
88      implementation */
89   PetscCall(MatViewFromOptions(Amat, NULL, "-g_mat_view"));
90   if (bs > 1 && (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray)) {
91     PetscInt  *d_nnz, *o_nnz;
92     MatScalar *aa,val,AA[4096];
93     PetscInt  *aj,*ai,AJ[4096],nc;
94     if (isseqaij) { a = Amat; b = NULL; }
95     else {
96       Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data;
97       a = d->A; b = d->B;
98     }
99     PetscCall(PetscInfo(Amat,"New bs>1 PCGAMGCreateGraph. nloc=%" PetscInt_FMT "\n",nloc));
100     PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
101     for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
102       PetscInt       *nnz = (c==a) ? d_nnz : o_nnz, nmax=0;
103       const PetscInt *cols;
104       for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows
105         PetscCall(MatGetRow(c,brow,&jj,&cols,NULL));
106         nnz[brow/bs] = jj/bs;
107         if (jj%bs) ok = 0;
108         if (cols) j0 = cols[0];
109         else j0 = -1;
110         PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL));
111         if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs];
112         for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks
113           PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL));
114           if (jj%bs) ok = 0;
115           if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
116           if (nnz[brow/bs] != jj/bs) ok = 0;
117           PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL));
118         }
119         if (!ok) {
120           PetscCall(PetscFree2(d_nnz,o_nnz));
121           goto old_bs;
122         }
123       }
124       PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax);
125     }
126     PetscCall(MatCreate(comm, &Gmat));
127     PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE));
128     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
129     PetscCall(MatSetType(Gmat, MATAIJ));
130     PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
131     PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
132     PetscCall(PetscFree2(d_nnz,o_nnz));
133     // diag
134     for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows
135       Mat_SeqAIJ *aseq  = (Mat_SeqAIJ*)a->data;
136       ai = aseq->i;
137       n  = ai[brow+1] - ai[brow];
138       aj = aseq->j + ai[brow];
139       for (int k=0; k<n; k += bs) { // block columns
140         AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart)
141         val = 0;
142         for (int ii=0; ii<bs; ii++) { // rows in block
143           aa = aseq->a + ai[brow+ii] + k;
144           for (int jj=0; jj<bs; jj++) { // columns in block
145             val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
146           }
147         }
148         AA[k/bs] = val;
149       }
150       grow = Istart/bs + brow/bs;
151       PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES));
152     }
153     // off-diag
154     if (ismpiaij) {
155       Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)Amat->data;
156       const PetscScalar *vals;
157       const PetscInt    *cols, *garray = aij->garray;
158       PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?");
159       for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows
160         PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL));
161         for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) {
162           AA[k/bs] = 0;
163           AJ[cidx] = garray[cols[k]]/bs;
164         }
165         nc = ncols/bs;
166         PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL));
167         for (int ii=0; ii<bs; ii++) { // rows in block
168           PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals));
169           for (int k=0; k<ncols; k += bs) {
170             for (int jj=0; jj<bs; jj++) { // cols in block
171               AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj]));
172             }
173           }
174           PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals));
175         }
176         grow = Istart/bs + brow/bs;
177         PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES));
178       }
179     }
180     PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
181     PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
182     PetscCall(MatViewFromOptions(Gmat, NULL, "-g_mat_view"));
183   } else if (bs > 1) {
184     const PetscScalar *vals;
185     const PetscInt    *idx;
186     PetscInt          *d_nnz, *o_nnz,*w0,*w1,*w2;
187 
188 old_bs:
189     /*
190        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
191     */
192 
193     PetscCall(PetscInfo(Amat,"OLD bs>1 PCGAMGCreateGraph\n"));
194     PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
195 
196     if (isseqaij) {
197       PetscInt max_d_nnz;
198 
199       /*
200           Determine exact preallocation count for (sequential) scalar matrix
201       */
202       PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz));
203       max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
204       PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
205       for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
206         PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
207       }
208       PetscCall(PetscFree3(w0,w1,w2));
209 
210     } else if (ismpiaij) {
211       Mat            Daij,Oaij;
212       const PetscInt *garray;
213       PetscInt       max_d_nnz;
214 
215       PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray));
216 
217       /*
218           Determine exact preallocation count for diagonal block portion of scalar matrix
219       */
220       PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz));
221       max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
222       PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
223       for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
224         PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
225       }
226       PetscCall(PetscFree3(w0,w1,w2));
227 
228       /*
229          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
230       */
231       for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
232         o_nnz[jj] = 0;
233         for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
234           PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL));
235           o_nnz[jj] += ncols;
236           PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL));
237         }
238         if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
239       }
240 
241     } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type");
242 
243     /* get scalar copy (norms) of matrix */
244     PetscCall(MatCreate(comm, &Gmat));
245     PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE));
246     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
247     PetscCall(MatSetType(Gmat, MATAIJ));
248     PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
249     PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
250     PetscCall(PetscFree2(d_nnz,o_nnz));
251 
252     for (Ii = Istart; Ii < Iend; Ii++) {
253       PetscInt dest_row = Ii/bs;
254       PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals));
255       for (jj=0; jj<ncols; jj++) {
256         PetscInt    dest_col = idx[jj]/bs;
257         PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
258         PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES));
259       }
260       PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals));
261     }
262     PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
263     PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
264     PetscCall(MatViewFromOptions(Gmat, NULL, "-g_mat_view"));
265   } else {
266     /* TODO GPU: optimization proposal, each class provides fast implementation of this
267      procedure via MatAbs API */
268     /* just copy scalar matrix & abs() */
269     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
270     if (isseqaij) { a = Gmat; b = NULL; }
271     else {
272       Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
273       a = d->A; b = d->B;
274     }
275     /* abs */
276     for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
277       MatInfo     info;
278       PetscScalar *avals;
279       PetscCall(MatGetInfo(c,MAT_LOCAL,&info));
280       PetscCall(MatSeqAIJGetArray(c,&avals));
281       for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
282       PetscCall(MatSeqAIJRestoreArray(c,&avals));
283     }
284   }
285   if (symm) {
286     Mat matTrans;
287     PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
288     PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
289     PetscCall(MatDestroy(&matTrans));
290   }
291   PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE));
292   /* PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); -- a graph has to be symmetric and +. Normal Mat options are not relevant ? */
293 
294   *a_Gmat = Gmat;
295   PetscFunctionReturn(0);
296 }
297 
298 /* -------------------------------------------------------------------------- */
299 /*@C
300    PCGAMGFilterGraph - filter values with small absolute values (and make graph symmetric if requested).
301      With vfilter < 0 just return. The user needs to check if the matrix has not changed if they allow for vfilter < 0
302 
303    Collective on Mat
304 
305    Input Parameters:
306 +   a_Gmat - the graph
307 .   vfilter - threshold parameter [0,1)
308 
309  Output Parameter:
310  . a_Gmat - output filtered scalar graph
311 
312    Level: developer
313 
314    Notes:
315     This is called before graph coarsers are called.
316     This could go into Mat, move 'symm' to GAMG
317 
318 .seealso: `PCGAMGSetThreshold()`
319 @*/
320 PetscErrorCode PCGAMGFilterGraph(Mat *a_Gmat,PetscReal vfilter)
321 {
322   PetscInt          Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc;
323   Mat               Gmat  = *a_Gmat, tGmat;
324   MPI_Comm          comm;
325   const PetscScalar *vals;
326   const PetscInt    *idx;
327   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, AJ[4096];
328   MatScalar         AA[4096]; // this is checked in graph
329   Vec               diag;
330   PetscBool         ismpiaij,isseqaij;
331   Mat               a, b, c;
332 
333   PetscFunctionBegin;
334   PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm));
335   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij));
336   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATMPIAIJ,&ismpiaij));
337 
338   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
339                Also, if the matrix is symmetric, can we skip this
340                operation? It can be very expensive on large matrices. */
341   if (vfilter < 0.0) {
342     /* nothing else to do, just return */
343     PetscFunctionReturn(0);
344   }
345 
346   /* scale c for all values between -1 and 1 */
347   PetscCall(MatCreateVecs(Gmat, &diag, NULL));
348   PetscCall(MatGetDiagonal(Gmat, diag));
349   PetscCall(VecReciprocal(diag));
350   PetscCall(VecSqrtAbs(diag));
351   PetscCall(MatDiagonalScale(Gmat, diag, diag));
352 
353   // global sizes
354   PetscCall(MatGetSize(Gmat, &MM, &NN));
355   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
356   nloc = Iend - Istart;
357   PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz));
358   if (isseqaij) { a = Gmat; b = NULL; }
359   else {
360     Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
361     a = d->A; b = d->B;
362     garray = d->garray;
363   }
364   /* Determine upper bound on non-zeros needed in new filtered matrix */
365   for (PetscInt row=0; row < nloc; row++) {
366     PetscCall(MatGetRow(a,row,&ncols,NULL,NULL));
367     d_nnz[row] = ncols;
368     PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL));
369   }
370   if (b) {
371     for (PetscInt row=0; row < nloc; row++) {
372       PetscCall(MatGetRow(b,row,&ncols,NULL,NULL));
373       o_nnz[row] = ncols;
374       PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL));
375     }
376   }
377   PetscCall(MatCreate(comm, &tGmat));
378   PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM));
379   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
380   PetscCall(MatSetType(tGmat, MATAIJ));
381   PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz));
382   PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz));
383   PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
384   PetscCall(PetscFree2(d_nnz,o_nnz));
385   nnz0 = nnz1 = 0;
386   for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
387     for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) {
388       PetscCall(MatGetRow(c,row,&ncols,&idx,&vals));
389       PetscCheck(ncols<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer, ncols = %" PetscInt_FMT ", too small 4096.",ncols);
390       for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) {
391         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
392         if (PetscRealPart(sv) > vfilter) {
393           nnz1++;
394           PetscInt cid = idx[jj] + Istart; //diag
395           if (c!=a) cid = garray[idx[jj]];
396           AA[ncol_row] = vals[jj];
397           AJ[ncol_row] = cid;
398           ncol_row++;
399         }
400       }
401       PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals));
402       PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES));
403     }
404   }
405   PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY));
406   PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY));
407   PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */
408 
409   PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ")\n",
410                       (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter,
411                       (!nloc) ? 1. : (double)nnz0/(double)nloc,MM));
412 
413   PetscCall(MatDestroy(&Gmat));
414   PetscCall(VecDestroy(&diag));
415   *a_Gmat = tGmat;
416   PetscFunctionReturn(0);
417 }
418 
419 /* -------------------------------------------------------------------------- */
420 /*
421    PCGAMGGetDataWithGhosts - hacks into Mat MPIAIJ so this must have size > 1
422 
423    Input Parameter:
424    . Gmat - MPIAIJ matrix for scattters
425    . data_sz - number of data terms per node (# cols in output)
426    . data_in[nloc*data_sz] - column oriented data
427    Output Parameter:
428    . a_stride - numbrt of rows of output
429    . a_data_out[stride*data_sz] - output data with ghosts
430 */
431 PetscErrorCode PCGAMGGetDataWithGhosts(Mat Gmat,PetscInt data_sz,PetscReal data_in[],PetscInt *a_stride,PetscReal **a_data_out)
432 {
433   Vec            tmp_crds;
434   Mat_MPIAIJ     *mpimat = (Mat_MPIAIJ*)Gmat->data;
435   PetscInt       nnodes,num_ghosts,dir,kk,jj,my0,Iend,nloc;
436   PetscScalar    *data_arr;
437   PetscReal      *datas;
438   PetscBool      isMPIAIJ;
439 
440   PetscFunctionBegin;
441   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATMPIAIJ, &isMPIAIJ));
442   PetscCall(MatGetOwnershipRange(Gmat, &my0, &Iend));
443   nloc      = Iend - my0;
444   PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts));
445   nnodes    = num_ghosts + nloc;
446   *a_stride = nnodes;
447   PetscCall(MatCreateVecs(Gmat, &tmp_crds, NULL));
448 
449   PetscCall(PetscMalloc1(data_sz*nnodes, &datas));
450   for (dir=0; dir<data_sz; dir++) {
451     /* set local, and global */
452     for (kk=0; kk<nloc; kk++) {
453       PetscInt    gid = my0 + kk;
454       PetscScalar crd = (PetscScalar)data_in[dir*nloc + kk]; /* col oriented */
455       datas[dir*nnodes + kk] = PetscRealPart(crd);
456 
457       PetscCall(VecSetValues(tmp_crds, 1, &gid, &crd, INSERT_VALUES));
458     }
459     PetscCall(VecAssemblyBegin(tmp_crds));
460     PetscCall(VecAssemblyEnd(tmp_crds));
461     /* get ghost datas */
462     PetscCall(VecScatterBegin(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD));
463     PetscCall(VecScatterEnd(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD));
464     PetscCall(VecGetArray(mpimat->lvec, &data_arr));
465     for (kk=nloc,jj=0;jj<num_ghosts;kk++,jj++) datas[dir*nnodes + kk] = PetscRealPart(data_arr[jj]);
466     PetscCall(VecRestoreArray(mpimat->lvec, &data_arr));
467   }
468   PetscCall(VecDestroy(&tmp_crds));
469   *a_data_out = datas;
470   PetscFunctionReturn(0);
471 }
472 
473 PetscErrorCode PCGAMGHashTableCreate(PetscInt a_size, PCGAMGHashTable *a_tab)
474 {
475   PetscInt       kk;
476 
477   PetscFunctionBegin;
478   a_tab->size = a_size;
479   PetscCall(PetscMalloc2(a_size, &a_tab->table,a_size, &a_tab->data));
480   for (kk=0; kk<a_size; kk++) a_tab->table[kk] = -1;
481   PetscFunctionReturn(0);
482 }
483 
484 PetscErrorCode PCGAMGHashTableDestroy(PCGAMGHashTable *a_tab)
485 {
486   PetscFunctionBegin;
487   PetscCall(PetscFree2(a_tab->table,a_tab->data));
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode PCGAMGHashTableAdd(PCGAMGHashTable *a_tab, PetscInt a_key, PetscInt a_data)
492 {
493   PetscInt kk,idx;
494 
495   PetscFunctionBegin;
496   PetscCheck(a_key>=0,PETSC_COMM_SELF,PETSC_ERR_USER,"Negative key %" PetscInt_FMT ".",a_key);
497   for (kk = 0, idx = GAMG_HASH(a_key); kk < a_tab->size; kk++, idx = (idx==(a_tab->size-1)) ? 0 : idx + 1) {
498     if (a_tab->table[idx] == a_key) {
499       /* exists */
500       a_tab->data[idx] = a_data;
501       break;
502     } else if (a_tab->table[idx] == -1) {
503       /* add */
504       a_tab->table[idx] = a_key;
505       a_tab->data[idx]  = a_data;
506       break;
507     }
508   }
509   if (kk==a_tab->size) {
510     /* this is not to efficient, waiting until completely full */
511     PetscInt       oldsize = a_tab->size, new_size = 2*a_tab->size + 5, *oldtable = a_tab->table, *olddata = a_tab->data;
512 
513     a_tab->size = new_size;
514     PetscCall(PetscMalloc2(a_tab->size, &a_tab->table,a_tab->size, &a_tab->data));
515     for (kk=0;kk<a_tab->size;kk++) a_tab->table[kk] = -1;
516     for (kk=0;kk<oldsize;kk++) {
517       if (oldtable[kk] != -1) {
518         PetscCall(PCGAMGHashTableAdd(a_tab, oldtable[kk], olddata[kk]));
519        }
520     }
521     PetscCall(PetscFree2(oldtable,olddata));
522     PetscCall(PCGAMGHashTableAdd(a_tab, a_key, a_data));
523   }
524   PetscFunctionReturn(0);
525 }
526