1 /* 2 GAMG geometric-algebric multigrid PC - Mark Adams 2011 3 */ 4 #include <petsc/private/matimpl.h> 5 #include <../src/ksp/pc/impls/gamg/gamg.h> /*I "petscpc.h" I*/ 6 #include <petsc/private/kspimpl.h> 7 8 /* 9 Produces a set of block column indices of the matrix row, one for each block represented in the original row 10 11 n - the number of block indices in cc[] 12 cc - the block indices (must be large enough to contain the indices) 13 */ 14 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 15 { 16 PetscInt cnt = -1,nidx,j; 17 const PetscInt *idx; 18 19 PetscFunctionBegin; 20 PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL)); 21 if (nidx) { 22 cnt = 0; 23 cc[cnt] = idx[0]/bs; 24 for (j=1; j<nidx; j++) { 25 if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 26 } 27 } 28 PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL)); 29 *n = cnt+1; 30 PetscFunctionReturn(0); 31 } 32 33 /* 34 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 35 36 ncollapsed - the number of block indices 37 collapsed - the block indices (must be large enough to contain the indices) 38 */ 39 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 40 { 41 PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 42 43 PetscFunctionBegin; 44 PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev)); 45 for (i=start+1; i<start+bs; i++) { 46 PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur)); 47 PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged)); 48 cprevtmp = cprev; cprev = merged; merged = cprevtmp; 49 } 50 *ncollapsed = nprev; 51 if (collapsed) *collapsed = cprev; 52 PetscFunctionReturn(0); 53 } 54 55 /* -------------------------------------------------------------------------- */ 56 /* 57 PCGAMGCreateGraph - create simple scaled scalar graph from matrix 58 59 Input Parameter: 60 . Amat - matrix 61 Output Parameter: 62 . a_Gmaat - eoutput scalar graph (symmetric?) 63 */ 64 PetscErrorCode PCGAMGCreateGraph(Mat Amat, Mat *a_Gmat) 65 { 66 PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; 67 MPI_Comm comm; 68 Mat Gmat; 69 PetscBool ismpiaij,isseqaij; 70 71 PetscFunctionBegin; 72 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 73 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 74 PetscCall(MatGetSize(Amat, &MM, &NN)); 75 PetscCall(MatGetBlockSize(Amat, &bs)); 76 nloc = (Iend-Istart)/bs; 77 78 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 79 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 80 PetscCheck(isseqaij || ismpiaij,PETSC_COMM_WORLD,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 81 82 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 83 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 84 implementation */ 85 PetscCall(MatViewFromOptions(Amat, NULL, "-g_mat_view")); 86 if (bs > 1 && (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray)) { 87 PetscInt *d_nnz, *o_nnz; 88 Mat a, b, c; 89 MatScalar *aa,val,AA[4096]; 90 PetscInt *aj,*ai,AJ[4096],nc; 91 PetscCall(PetscInfo(Amat,"New bs>1 PCGAMGCreateGraph. nloc=%" PetscInt_FMT "\n",nloc)); 92 if (isseqaij) { 93 a = Amat; b = NULL; 94 } 95 else { 96 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; 97 a = d->A; b = d->B; 98 } 99 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 100 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 101 PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; 102 const PetscInt *cols; 103 for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows 104 PetscCall(MatGetRow(c,brow,&jj,&cols,NULL)); 105 nnz[brow/bs] = jj/bs; 106 if (jj%bs) ok = 0; 107 if (cols) j0 = cols[0]; 108 else j0 = -1; 109 PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL)); 110 if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; 111 for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks 112 PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL)); 113 if (jj%bs) ok = 0; 114 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 115 if (nnz[brow/bs] != jj/bs) ok = 0; 116 PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL)); 117 } 118 if (!ok) { 119 PetscCall(PetscFree2(d_nnz,o_nnz)); 120 goto old_bs; 121 } 122 } 123 PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax); 124 } 125 PetscCall(MatCreate(comm, &Gmat)); 126 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 127 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 128 PetscCall(MatSetType(Gmat, MATAIJ)); 129 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 130 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 131 PetscCall(PetscFree2(d_nnz,o_nnz)); 132 // diag 133 for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows 134 Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; 135 ai = aseq->i; 136 n = ai[brow+1] - ai[brow]; 137 aj = aseq->j + ai[brow]; 138 for (int k=0; k<n; k += bs) { // block columns 139 AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart) 140 val = 0; 141 for (int ii=0; ii<bs; ii++) { // rows in block 142 aa = aseq->a + ai[brow+ii] + k; 143 for (int jj=0; jj<bs; jj++) { // columns in block 144 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 145 } 146 } 147 AA[k/bs] = val; 148 } 149 grow = Istart/bs + brow/bs; 150 PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES)); 151 } 152 // off-diag 153 if (ismpiaij) { 154 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Amat->data; 155 const PetscScalar *vals; 156 const PetscInt *cols, *garray = aij->garray; 157 PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); 158 for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows 159 PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL)); 160 for (int k=0,cidx=0; k<ncols; k += bs,cidx++) { 161 AA[k/bs] = 0; 162 AJ[cidx] = garray[cols[k]]/bs; 163 } 164 nc = ncols/bs; 165 PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL)); 166 for (int ii=0; ii<bs; ii++) { // rows in block 167 PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals)); 168 for (int k=0; k<ncols; k += bs) { 169 for (int jj=0; jj<bs; jj++) { // cols in block 170 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj])); 171 } 172 } 173 PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals)); 174 } 175 grow = Istart/bs + brow/bs; 176 PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES)); 177 } 178 } 179 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 180 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 181 PetscCall(MatViewFromOptions(Gmat, NULL, "-g_mat_view")); 182 } else if (bs > 1) { 183 const PetscScalar *vals; 184 const PetscInt *idx; 185 PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; 186 187 old_bs: 188 /* 189 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 190 */ 191 192 PetscCall(PetscInfo(Amat,"OLD bs>1 PCGAMGCreateGraph\n")); 193 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 194 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 195 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 196 197 if (isseqaij) { 198 PetscInt max_d_nnz; 199 200 /* 201 Determine exact preallocation count for (sequential) scalar matrix 202 */ 203 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz)); 204 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 205 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 206 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 207 PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 208 } 209 PetscCall(PetscFree3(w0,w1,w2)); 210 211 } else if (ismpiaij) { 212 Mat Daij,Oaij; 213 const PetscInt *garray; 214 PetscInt max_d_nnz; 215 216 PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray)); 217 218 /* 219 Determine exact preallocation count for diagonal block portion of scalar matrix 220 */ 221 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz)); 222 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 223 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 224 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 225 PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 226 } 227 PetscCall(PetscFree3(w0,w1,w2)); 228 229 /* 230 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 231 */ 232 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 233 o_nnz[jj] = 0; 234 for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 235 PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 236 o_nnz[jj] += ncols; 237 PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 238 } 239 if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 240 } 241 242 } else SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"Require AIJ matrix type"); 243 244 /* get scalar copy (norms) of matrix */ 245 PetscCall(MatCreate(comm, &Gmat)); 246 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 247 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 248 PetscCall(MatSetType(Gmat, MATAIJ)); 249 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 250 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 251 PetscCall(PetscFree2(d_nnz,o_nnz)); 252 253 for (Ii = Istart; Ii < Iend; Ii++) { 254 PetscInt dest_row = Ii/bs; 255 PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals)); 256 for (jj=0; jj<ncols; jj++) { 257 PetscInt dest_col = idx[jj]/bs; 258 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 259 PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES)); 260 } 261 PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals)); 262 } 263 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 264 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 265 PetscCall(MatViewFromOptions(Gmat, NULL, "-g_mat_view")); 266 } else { 267 /* just copy scalar matrix - abs() not taken here but scaled later */ 268 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 269 } 270 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 271 272 *a_Gmat = Gmat; 273 PetscFunctionReturn(0); 274 } 275 276 /* -------------------------------------------------------------------------- */ 277 /*@C 278 PCGAMGFilterGraph - filter (remove zero and possibly small values from the) graph and make it symmetric if requested 279 280 Collective on Mat 281 282 Input Parameters: 283 + a_Gmat - the graph 284 . vfilter - threshold parameter [0,1) 285 - symm - make the result symmetric 286 287 Level: developer 288 289 Notes: 290 This is called before graph coarsers are called. 291 292 .seealso: PCGAMGSetThreshold() 293 @*/ 294 PetscErrorCode PCGAMGFilterGraph(Mat *a_Gmat,PetscReal vfilter,PetscBool symm) 295 { 296 PetscInt Istart,Iend,Ii,jj,ncols,nnz0,nnz1, NN, MM, nloc; 297 PetscMPIInt rank; 298 Mat Gmat = *a_Gmat, tGmat; 299 MPI_Comm comm; 300 const PetscScalar *vals; 301 const PetscInt *idx; 302 PetscInt *d_nnz, *o_nnz; 303 Vec diag; 304 305 PetscFunctionBegin; 306 /* TODO GPU: optimization proposal, each class provides fast implementation of this 307 procedure via MatAbs API */ 308 if (vfilter < 0.0 && !symm) { 309 /* Just use the provided matrix as the graph but make all values positive */ 310 MatInfo info; 311 PetscScalar *avals; 312 PetscBool isaij,ismpiaij; 313 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isaij)); 314 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATMPIAIJ,&ismpiaij)); 315 PetscCheck(isaij || ismpiaij,PETSC_COMM_WORLD,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 316 if (isaij) { 317 PetscCall(MatGetInfo(Gmat,MAT_LOCAL,&info)); 318 PetscCall(MatSeqAIJGetArray(Gmat,&avals)); 319 for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 320 PetscCall(MatSeqAIJRestoreArray(Gmat,&avals)); 321 } else { 322 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Gmat->data; 323 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 324 PetscCall(MatSeqAIJGetArray(aij->A,&avals)); 325 for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 326 PetscCall(MatSeqAIJRestoreArray(aij->A,&avals)); 327 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 328 PetscCall(MatSeqAIJGetArray(aij->B,&avals)); 329 for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 330 PetscCall(MatSeqAIJRestoreArray(aij->B,&avals)); 331 } 332 PetscFunctionReturn(0); 333 } 334 335 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 336 Also, if the matrix is symmetric, can we skip this 337 operation? It can be very expensive on large matrices. */ 338 PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm)); 339 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 340 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 341 nloc = Iend - Istart; 342 PetscCall(MatGetSize(Gmat, &MM, &NN)); 343 344 if (symm) { 345 Mat matTrans; 346 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 347 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 348 PetscCall(MatDestroy(&matTrans)); 349 } 350 351 /* scale Gmat for all values between -1 and 1 */ 352 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 353 PetscCall(MatGetDiagonal(Gmat, diag)); 354 PetscCall(VecReciprocal(diag)); 355 PetscCall(VecSqrtAbs(diag)); 356 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 357 PetscCall(VecDestroy(&diag)); 358 359 /* Determine upper bound on nonzeros needed in new filtered matrix */ 360 PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz)); 361 for (Ii = Istart, jj = 0; Ii < Iend; Ii++, jj++) { 362 PetscCall(MatGetRow(Gmat,Ii,&ncols,NULL,NULL)); 363 d_nnz[jj] = ncols; 364 o_nnz[jj] = ncols; 365 PetscCall(MatRestoreRow(Gmat,Ii,&ncols,NULL,NULL)); 366 if (d_nnz[jj] > nloc) d_nnz[jj] = nloc; 367 if (o_nnz[jj] > (MM-nloc)) o_nnz[jj] = MM - nloc; 368 } 369 PetscCall(MatCreate(comm, &tGmat)); 370 PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM)); 371 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 372 PetscCall(MatSetType(tGmat, MATAIJ)); 373 PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz)); 374 PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz)); 375 PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 376 PetscCall(PetscFree2(d_nnz,o_nnz)); 377 378 for (Ii = Istart, nnz0 = nnz1 = 0; Ii < Iend; Ii++) { 379 PetscCall(MatGetRow(Gmat,Ii,&ncols,&idx,&vals)); 380 for (jj=0; jj<ncols; jj++,nnz0++) { 381 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 382 if (PetscRealPart(sv) > vfilter) { 383 nnz1++; 384 PetscCall(MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,INSERT_VALUES)); 385 } 386 } 387 PetscCall(MatRestoreRow(Gmat,Ii,&ncols,&idx,&vals)); 388 } 389 PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY)); 390 PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY)); 391 if (symm) { 392 PetscCall(MatSetOption(tGmat,MAT_SYMMETRIC,PETSC_TRUE)); 393 } else { 394 PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); 395 } 396 397 #if defined(PETSC_USE_INFO) 398 { 399 double t1 = (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, t2 = (!nloc) ? 1. : (double)nnz0/(double)nloc; 400 PetscCall(PetscInfo(*a_Gmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ")\n",t1,(double)vfilter,t2,MM)); 401 } 402 #endif 403 PetscCall(MatDestroy(&Gmat)); 404 *a_Gmat = tGmat; 405 PetscFunctionReturn(0); 406 } 407 408 /* -------------------------------------------------------------------------- */ 409 /* 410 PCGAMGGetDataWithGhosts - hacks into Mat MPIAIJ so this must have size > 1 411 412 Input Parameter: 413 . Gmat - MPIAIJ matrix for scattters 414 . data_sz - number of data terms per node (# cols in output) 415 . data_in[nloc*data_sz] - column oriented data 416 Output Parameter: 417 . a_stride - numbrt of rows of output 418 . a_data_out[stride*data_sz] - output data with ghosts 419 */ 420 PetscErrorCode PCGAMGGetDataWithGhosts(Mat Gmat,PetscInt data_sz,PetscReal data_in[],PetscInt *a_stride,PetscReal **a_data_out) 421 { 422 Vec tmp_crds; 423 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ*)Gmat->data; 424 PetscInt nnodes,num_ghosts,dir,kk,jj,my0,Iend,nloc; 425 PetscScalar *data_arr; 426 PetscReal *datas; 427 PetscBool isMPIAIJ; 428 429 PetscFunctionBegin; 430 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATMPIAIJ, &isMPIAIJ)); 431 PetscCall(MatGetOwnershipRange(Gmat, &my0, &Iend)); 432 nloc = Iend - my0; 433 PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts)); 434 nnodes = num_ghosts + nloc; 435 *a_stride = nnodes; 436 PetscCall(MatCreateVecs(Gmat, &tmp_crds, NULL)); 437 438 PetscCall(PetscMalloc1(data_sz*nnodes, &datas)); 439 for (dir=0; dir<data_sz; dir++) { 440 /* set local, and global */ 441 for (kk=0; kk<nloc; kk++) { 442 PetscInt gid = my0 + kk; 443 PetscScalar crd = (PetscScalar)data_in[dir*nloc + kk]; /* col oriented */ 444 datas[dir*nnodes + kk] = PetscRealPart(crd); 445 446 PetscCall(VecSetValues(tmp_crds, 1, &gid, &crd, INSERT_VALUES)); 447 } 448 PetscCall(VecAssemblyBegin(tmp_crds)); 449 PetscCall(VecAssemblyEnd(tmp_crds)); 450 /* get ghost datas */ 451 PetscCall(VecScatterBegin(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 452 PetscCall(VecScatterEnd(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 453 PetscCall(VecGetArray(mpimat->lvec, &data_arr)); 454 for (kk=nloc,jj=0;jj<num_ghosts;kk++,jj++) datas[dir*nnodes + kk] = PetscRealPart(data_arr[jj]); 455 PetscCall(VecRestoreArray(mpimat->lvec, &data_arr)); 456 } 457 PetscCall(VecDestroy(&tmp_crds)); 458 *a_data_out = datas; 459 PetscFunctionReturn(0); 460 } 461 462 PetscErrorCode PCGAMGHashTableCreate(PetscInt a_size, PCGAMGHashTable *a_tab) 463 { 464 PetscInt kk; 465 466 PetscFunctionBegin; 467 a_tab->size = a_size; 468 PetscCall(PetscMalloc2(a_size, &a_tab->table,a_size, &a_tab->data)); 469 for (kk=0; kk<a_size; kk++) a_tab->table[kk] = -1; 470 PetscFunctionReturn(0); 471 } 472 473 PetscErrorCode PCGAMGHashTableDestroy(PCGAMGHashTable *a_tab) 474 { 475 PetscFunctionBegin; 476 PetscCall(PetscFree2(a_tab->table,a_tab->data)); 477 PetscFunctionReturn(0); 478 } 479 480 PetscErrorCode PCGAMGHashTableAdd(PCGAMGHashTable *a_tab, PetscInt a_key, PetscInt a_data) 481 { 482 PetscInt kk,idx; 483 484 PetscFunctionBegin; 485 PetscCheck(a_key>=0,PETSC_COMM_SELF,PETSC_ERR_USER,"Negative key %" PetscInt_FMT ".",a_key); 486 for (kk = 0, idx = GAMG_HASH(a_key); kk < a_tab->size; kk++, idx = (idx==(a_tab->size-1)) ? 0 : idx + 1) { 487 if (a_tab->table[idx] == a_key) { 488 /* exists */ 489 a_tab->data[idx] = a_data; 490 break; 491 } else if (a_tab->table[idx] == -1) { 492 /* add */ 493 a_tab->table[idx] = a_key; 494 a_tab->data[idx] = a_data; 495 break; 496 } 497 } 498 if (kk==a_tab->size) { 499 /* this is not to efficient, waiting until completely full */ 500 PetscInt oldsize = a_tab->size, new_size = 2*a_tab->size + 5, *oldtable = a_tab->table, *olddata = a_tab->data; 501 502 a_tab->size = new_size; 503 PetscCall(PetscMalloc2(a_tab->size, &a_tab->table,a_tab->size, &a_tab->data)); 504 for (kk=0;kk<a_tab->size;kk++) a_tab->table[kk] = -1; 505 for (kk=0;kk<oldsize;kk++) { 506 if (oldtable[kk] != -1) { 507 PetscCall(PCGAMGHashTableAdd(a_tab, oldtable[kk], olddata[kk])); 508 } 509 } 510 PetscCall(PetscFree2(oldtable,olddata)); 511 PetscCall(PCGAMGHashTableAdd(a_tab, a_key, a_data)); 512 } 513 PetscFunctionReturn(0); 514 } 515