xref: /petsc/src/ksp/pc/impls/gamg/util.c (revision c3b5f7ba6bc5ce25a01a67bb37ba5d34b02bbbd7)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <petsc/private/kspimpl.h>
7 
8 /*
9    Produces a set of block column indices of the matrix row, one for each block represented in the original row
10 
11    n - the number of block indices in cc[]
12    cc - the block indices (must be large enough to contain the indices)
13 */
14 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
15 {
16   PetscInt       cnt = -1,nidx,j;
17   const PetscInt *idx;
18 
19   PetscFunctionBegin;
20   PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL));
21   if (nidx) {
22     cnt = 0;
23     cc[cnt] = idx[0]/bs;
24     for (j=1; j<nidx; j++) {
25       if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
26     }
27   }
28   PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL));
29   *n = cnt+1;
30   PetscFunctionReturn(0);
31 }
32 
33 /*
34     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
35 
36     ncollapsed - the number of block indices
37     collapsed - the block indices (must be large enough to contain the indices)
38 */
39 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
40 {
41   PetscInt       i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
42 
43   PetscFunctionBegin;
44   PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev));
45   for (i=start+1; i<start+bs; i++) {
46     PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur));
47     PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged));
48     cprevtmp = cprev; cprev = merged; merged = cprevtmp;
49   }
50   *ncollapsed = nprev;
51   if (collapsed) *collapsed  = cprev;
52   PetscFunctionReturn(0);
53 }
54 
55 /* -------------------------------------------------------------------------- */
56 /*
57    PCGAMGCreateGraph - create simple scaled scalar graph from matrix
58 
59  Input Parameter:
60  . Amat - matrix
61  Output Parameter:
62  . a_Gmaat - eoutput scalar graph (symmetric?)
63  */
64 PetscErrorCode PCGAMGCreateGraph(Mat Amat, Mat *a_Gmat)
65 {
66   PetscInt       Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs;
67   MPI_Comm       comm;
68   Mat            Gmat;
69   PetscBool      ismpiaij,isseqaij;
70 
71   PetscFunctionBegin;
72   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
73   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
74   PetscCall(MatGetSize(Amat, &MM, &NN));
75   PetscCall(MatGetBlockSize(Amat, &bs));
76   nloc = (Iend-Istart)/bs;
77 
78   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij));
79   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij));
80   PetscCheck(isseqaij || ismpiaij,PETSC_COMM_WORLD,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
81   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0));
82 
83   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
84   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
85      implementation */
86   PetscCall(MatViewFromOptions(Amat, NULL, "-g_mat_view"));
87   if (bs > 1 && (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray)) {
88     PetscInt  *d_nnz, *o_nnz;
89     Mat       a, b, c;
90     MatScalar *aa,val,AA[4096];
91     PetscInt  *aj,*ai,AJ[4096],nc;
92     PetscCall(PetscInfo(Amat,"New bs>1 PCGAMGCreateGraph. nloc=%D\n",nloc));
93     if (isseqaij) {
94       a = Amat; b = NULL;
95     }
96     else {
97       Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data;
98       a = d->A; b = d->B;
99     }
100     PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
101     for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
102       PetscInt       *nnz = (c==a) ? d_nnz : o_nnz, nmax=0;
103       const PetscInt *cols;
104       for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows
105         PetscCall(MatGetRow(c,brow,&jj,&cols,NULL));
106         nnz[brow/bs] = jj/bs;
107         if (jj%bs) ok = 0;
108         if (cols) j0 = cols[0];
109         else j0 = -1;
110         PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL));
111         if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs];
112         for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks
113           PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL));
114           if (jj%bs) ok = 0;
115           if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
116           if (nnz[brow/bs] != jj/bs) ok = 0;
117           PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL));
118         }
119         if (!ok) {
120           PetscCall(PetscFree2(d_nnz,o_nnz));
121           goto old_bs;
122         }
123       }
124       PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %D too small %D.",nmax,4096);
125     }
126     PetscCall(MatCreate(comm, &Gmat));
127     PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE));
128     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
129     PetscCall(MatSetType(Gmat, MATAIJ));
130     PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
131     PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
132     PetscCall(PetscFree2(d_nnz,o_nnz));
133     // diag
134     for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows
135       Mat_SeqAIJ *aseq  = (Mat_SeqAIJ*)a->data;
136       ai = aseq->i;
137       n  = ai[brow+1] - ai[brow];
138       aj = aseq->j + ai[brow];
139       for (int k=0; k<n; k += bs) { // block columns
140         AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart)
141         val = 0;
142         for (int ii=0; ii<bs; ii++) { // rows in block
143           aa = aseq->a + ai[brow+ii] + k;
144           for (int jj=0; jj<bs; jj++) { // columns in block
145             val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
146           }
147         }
148         AA[k/bs] = val;
149       }
150       grow = Istart/bs + brow/bs;
151       PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES));
152     }
153     // off-diag
154     if (ismpiaij) {
155       Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)Amat->data;
156       const PetscScalar *vals;
157       const PetscInt    *cols, *garray = aij->garray;
158       PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?");
159       for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows
160         PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL));
161         for (int k=0,cidx=0; k<ncols; k += bs,cidx++) {
162           AA[k/bs] = 0;
163           AJ[cidx] = garray[cols[k]]/bs;
164         }
165         nc = ncols/bs;
166         PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL));
167         for (int ii=0; ii<bs; ii++) { // rows in block
168           PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals));
169           for (int k=0; k<ncols; k += bs) {
170             for (int jj=0; jj<bs; jj++) { // cols in block
171               AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj]));
172             }
173           }
174           PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals));
175         }
176         grow = Istart/bs + brow/bs;
177         PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES));
178       }
179     }
180     PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
181     PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
182     PetscCall(MatViewFromOptions(Gmat, NULL, "-g_mat_view"));
183   } else if (bs > 1) {
184     const PetscScalar *vals;
185     const PetscInt    *idx;
186     PetscInt          *d_nnz, *o_nnz,*w0,*w1,*w2;
187 
188 old_bs:
189     /*
190        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
191     */
192 
193     PetscCall(PetscInfo(Amat,"OLD bs>1 PCGAMGCreateGraph\n"));
194     PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij));
195     PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij));
196     PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
197 
198     if (isseqaij) {
199       PetscInt max_d_nnz;
200 
201       /*
202           Determine exact preallocation count for (sequential) scalar matrix
203       */
204       PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz));
205       max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
206       PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
207       for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
208         PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
209       }
210       PetscCall(PetscFree3(w0,w1,w2));
211 
212     } else if (ismpiaij) {
213       Mat            Daij,Oaij;
214       const PetscInt *garray;
215       PetscInt       max_d_nnz;
216 
217       PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray));
218 
219       /*
220           Determine exact preallocation count for diagonal block portion of scalar matrix
221       */
222       PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz));
223       max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
224       PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
225       for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
226         PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
227       }
228       PetscCall(PetscFree3(w0,w1,w2));
229 
230       /*
231          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
232       */
233       for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
234         o_nnz[jj] = 0;
235         for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
236           PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL));
237           o_nnz[jj] += ncols;
238           PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL));
239         }
240         if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
241       }
242 
243     } else SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"Require AIJ matrix type");
244 
245     /* get scalar copy (norms) of matrix */
246     PetscCall(MatCreate(comm, &Gmat));
247     PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE));
248     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
249     PetscCall(MatSetType(Gmat, MATAIJ));
250     PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
251     PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
252     PetscCall(PetscFree2(d_nnz,o_nnz));
253 
254     for (Ii = Istart; Ii < Iend; Ii++) {
255       PetscInt dest_row = Ii/bs;
256       PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals));
257       for (jj=0; jj<ncols; jj++) {
258         PetscInt    dest_col = idx[jj]/bs;
259         PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
260         PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES));
261       }
262       PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals));
263     }
264     PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
265     PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
266     PetscCall(MatViewFromOptions(Gmat, NULL, "-g_mat_view"));
267   } else {
268     /* just copy scalar matrix - abs() not taken here but scaled later */
269     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
270   }
271   PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
272 
273   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0));
274 
275   *a_Gmat = Gmat;
276   PetscFunctionReturn(0);
277 }
278 
279 /* -------------------------------------------------------------------------- */
280 /*@C
281    PCGAMGFilterGraph - filter (remove zero and possibly small values from the) graph and make it symmetric if requested
282 
283    Collective on Mat
284 
285    Input Parameters:
286 +   a_Gmat - the graph
287 .   vfilter - threshold parameter [0,1)
288 -   symm - make the result symmetric
289 
290    Level: developer
291 
292    Notes:
293     This is called before graph coarsers are called.
294 
295 .seealso: PCGAMGSetThreshold()
296 @*/
297 PetscErrorCode PCGAMGFilterGraph(Mat *a_Gmat,PetscReal vfilter,PetscBool symm)
298 {
299   PetscInt          Istart,Iend,Ii,jj,ncols,nnz0,nnz1, NN, MM, nloc;
300   PetscMPIInt       rank;
301   Mat               Gmat  = *a_Gmat, tGmat;
302   MPI_Comm          comm;
303   const PetscScalar *vals;
304   const PetscInt    *idx;
305   PetscInt          *d_nnz, *o_nnz;
306   Vec               diag;
307 
308   PetscFunctionBegin;
309   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[SET16],0,0,0,0));
310 
311   /* TODO GPU: optimization proposal, each class provides fast implementation of this
312      procedure via MatAbs API */
313   if (vfilter < 0.0 && !symm) {
314     /* Just use the provided matrix as the graph but make all values positive */
315     MatInfo     info;
316     PetscScalar *avals;
317     PetscBool isaij,ismpiaij;
318     PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isaij));
319     PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATMPIAIJ,&ismpiaij));
320     PetscCheck(isaij || ismpiaij,PETSC_COMM_WORLD,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
321     if (isaij) {
322       PetscCall(MatGetInfo(Gmat,MAT_LOCAL,&info));
323       PetscCall(MatSeqAIJGetArray(Gmat,&avals));
324       for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
325       PetscCall(MatSeqAIJRestoreArray(Gmat,&avals));
326     } else {
327       Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)Gmat->data;
328       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
329       PetscCall(MatSeqAIJGetArray(aij->A,&avals));
330       for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
331       PetscCall(MatSeqAIJRestoreArray(aij->A,&avals));
332       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
333       PetscCall(MatSeqAIJGetArray(aij->B,&avals));
334       for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
335       PetscCall(MatSeqAIJRestoreArray(aij->B,&avals));
336     }
337     PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[SET16],0,0,0,0));
338     PetscFunctionReturn(0);
339   }
340 
341   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
342                Also, if the matrix is symmetric, can we skip this
343                operation? It can be very expensive on large matrices. */
344   PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm));
345   PetscCallMPI(MPI_Comm_rank(comm,&rank));
346   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
347   nloc = Iend - Istart;
348   PetscCall(MatGetSize(Gmat, &MM, &NN));
349 
350   if (symm) {
351     Mat matTrans;
352     PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
353     PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
354     PetscCall(MatDestroy(&matTrans));
355   }
356 
357   /* scale Gmat for all values between -1 and 1 */
358   PetscCall(MatCreateVecs(Gmat, &diag, NULL));
359   PetscCall(MatGetDiagonal(Gmat, diag));
360   PetscCall(VecReciprocal(diag));
361   PetscCall(VecSqrtAbs(diag));
362   PetscCall(MatDiagonalScale(Gmat, diag, diag));
363   PetscCall(VecDestroy(&diag));
364 
365   /* Determine upper bound on nonzeros needed in new filtered matrix */
366   PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz));
367   for (Ii = Istart, jj = 0; Ii < Iend; Ii++, jj++) {
368     PetscCall(MatGetRow(Gmat,Ii,&ncols,NULL,NULL));
369     d_nnz[jj] = ncols;
370     o_nnz[jj] = ncols;
371     PetscCall(MatRestoreRow(Gmat,Ii,&ncols,NULL,NULL));
372     if (d_nnz[jj] > nloc) d_nnz[jj] = nloc;
373     if (o_nnz[jj] > (MM-nloc)) o_nnz[jj] = MM - nloc;
374   }
375   PetscCall(MatCreate(comm, &tGmat));
376   PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM));
377   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
378   PetscCall(MatSetType(tGmat, MATAIJ));
379   PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz));
380   PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz));
381   PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
382   PetscCall(PetscFree2(d_nnz,o_nnz));
383 
384   for (Ii = Istart, nnz0 = nnz1 = 0; Ii < Iend; Ii++) {
385     PetscCall(MatGetRow(Gmat,Ii,&ncols,&idx,&vals));
386     for (jj=0; jj<ncols; jj++,nnz0++) {
387       PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
388       if (PetscRealPart(sv) > vfilter) {
389         nnz1++;
390         PetscCall(MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,INSERT_VALUES));
391       }
392     }
393     PetscCall(MatRestoreRow(Gmat,Ii,&ncols,&idx,&vals));
394   }
395   PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY));
396   PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY));
397   if (symm) {
398     PetscCall(MatSetOption(tGmat,MAT_SYMMETRIC,PETSC_TRUE));
399   } else {
400     PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat));
401   }
402   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[SET16],0,0,0,0));
403 
404 #if defined(PETSC_USE_INFO)
405   {
406     double t1 = (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, t2 = (!nloc) ? 1. : (double)nnz0/(double)nloc;
407     PetscCall(PetscInfo(*a_Gmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%D)\n",t1,vfilter,t2,MM));
408   }
409 #endif
410   PetscCall(MatDestroy(&Gmat));
411   *a_Gmat = tGmat;
412   PetscFunctionReturn(0);
413 }
414 
415 /* -------------------------------------------------------------------------- */
416 /*
417    PCGAMGGetDataWithGhosts - hacks into Mat MPIAIJ so this must have size > 1
418 
419    Input Parameter:
420    . Gmat - MPIAIJ matrix for scattters
421    . data_sz - number of data terms per node (# cols in output)
422    . data_in[nloc*data_sz] - column oriented data
423    Output Parameter:
424    . a_stride - numbrt of rows of output
425    . a_data_out[stride*data_sz] - output data with ghosts
426 */
427 PetscErrorCode PCGAMGGetDataWithGhosts(Mat Gmat,PetscInt data_sz,PetscReal data_in[],PetscInt *a_stride,PetscReal **a_data_out)
428 {
429   Vec            tmp_crds;
430   Mat_MPIAIJ     *mpimat = (Mat_MPIAIJ*)Gmat->data;
431   PetscInt       nnodes,num_ghosts,dir,kk,jj,my0,Iend,nloc;
432   PetscScalar    *data_arr;
433   PetscReal      *datas;
434   PetscBool      isMPIAIJ;
435 
436   PetscFunctionBegin;
437   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATMPIAIJ, &isMPIAIJ));
438   PetscCall(MatGetOwnershipRange(Gmat, &my0, &Iend));
439   nloc      = Iend - my0;
440   PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts));
441   nnodes    = num_ghosts + nloc;
442   *a_stride = nnodes;
443   PetscCall(MatCreateVecs(Gmat, &tmp_crds, NULL));
444 
445   PetscCall(PetscMalloc1(data_sz*nnodes, &datas));
446   for (dir=0; dir<data_sz; dir++) {
447     /* set local, and global */
448     for (kk=0; kk<nloc; kk++) {
449       PetscInt    gid = my0 + kk;
450       PetscScalar crd = (PetscScalar)data_in[dir*nloc + kk]; /* col oriented */
451       datas[dir*nnodes + kk] = PetscRealPart(crd);
452 
453       PetscCall(VecSetValues(tmp_crds, 1, &gid, &crd, INSERT_VALUES));
454     }
455     PetscCall(VecAssemblyBegin(tmp_crds));
456     PetscCall(VecAssemblyEnd(tmp_crds));
457     /* get ghost datas */
458     PetscCall(VecScatterBegin(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD));
459     PetscCall(VecScatterEnd(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD));
460     PetscCall(VecGetArray(mpimat->lvec, &data_arr));
461     for (kk=nloc,jj=0;jj<num_ghosts;kk++,jj++) datas[dir*nnodes + kk] = PetscRealPart(data_arr[jj]);
462     PetscCall(VecRestoreArray(mpimat->lvec, &data_arr));
463   }
464   PetscCall(VecDestroy(&tmp_crds));
465   *a_data_out = datas;
466   PetscFunctionReturn(0);
467 }
468 
469 PetscErrorCode PCGAMGHashTableCreate(PetscInt a_size, PCGAMGHashTable *a_tab)
470 {
471   PetscInt       kk;
472 
473   PetscFunctionBegin;
474   a_tab->size = a_size;
475   PetscCall(PetscMalloc2(a_size, &a_tab->table,a_size, &a_tab->data));
476   for (kk=0; kk<a_size; kk++) a_tab->table[kk] = -1;
477   PetscFunctionReturn(0);
478 }
479 
480 PetscErrorCode PCGAMGHashTableDestroy(PCGAMGHashTable *a_tab)
481 {
482   PetscFunctionBegin;
483   PetscCall(PetscFree2(a_tab->table,a_tab->data));
484   PetscFunctionReturn(0);
485 }
486 
487 PetscErrorCode PCGAMGHashTableAdd(PCGAMGHashTable *a_tab, PetscInt a_key, PetscInt a_data)
488 {
489   PetscInt kk,idx;
490 
491   PetscFunctionBegin;
492   PetscCheckFalse(a_key<0,PETSC_COMM_SELF,PETSC_ERR_USER,"Negative key %D.",a_key);
493   for (kk = 0, idx = GAMG_HASH(a_key); kk < a_tab->size; kk++, idx = (idx==(a_tab->size-1)) ? 0 : idx + 1) {
494     if (a_tab->table[idx] == a_key) {
495       /* exists */
496       a_tab->data[idx] = a_data;
497       break;
498     } else if (a_tab->table[idx] == -1) {
499       /* add */
500       a_tab->table[idx] = a_key;
501       a_tab->data[idx]  = a_data;
502       break;
503     }
504   }
505   if (kk==a_tab->size) {
506     /* this is not to efficient, waiting until completely full */
507     PetscInt       oldsize = a_tab->size, new_size = 2*a_tab->size + 5, *oldtable = a_tab->table, *olddata = a_tab->data;
508 
509     a_tab->size = new_size;
510     PetscCall(PetscMalloc2(a_tab->size, &a_tab->table,a_tab->size, &a_tab->data));
511     for (kk=0;kk<a_tab->size;kk++) a_tab->table[kk] = -1;
512     for (kk=0;kk<oldsize;kk++) {
513       if (oldtable[kk] != -1) {
514         PetscCall(PCGAMGHashTableAdd(a_tab, oldtable[kk], olddata[kk]));
515        }
516     }
517     PetscCall(PetscFree2(oldtable,olddata));
518     PetscCall(PCGAMGHashTableAdd(a_tab, a_key, a_data));
519   }
520   PetscFunctionReturn(0);
521 }
522