1 /* 2 GAMG geometric-algebric multigrid PC - Mark Adams 2011 3 */ 4 #include <petsc/private/matimpl.h> 5 #include <../src/ksp/pc/impls/gamg/gamg.h> /*I "petscpc.h" I*/ 6 #include <petsc/private/kspimpl.h> 7 8 /* 9 Produces a set of block column indices of the matrix row, one for each block represented in the original row 10 11 n - the number of block indices in cc[] 12 cc - the block indices (must be large enough to contain the indices) 13 */ 14 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 15 { 16 PetscInt cnt = -1,nidx,j; 17 const PetscInt *idx; 18 19 PetscFunctionBegin; 20 PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL)); 21 if (nidx) { 22 cnt = 0; 23 cc[cnt] = idx[0]/bs; 24 for (j=1; j<nidx; j++) { 25 if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 26 } 27 } 28 PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL)); 29 *n = cnt+1; 30 PetscFunctionReturn(0); 31 } 32 33 /* 34 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 35 36 ncollapsed - the number of block indices 37 collapsed - the block indices (must be large enough to contain the indices) 38 */ 39 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 40 { 41 PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 42 43 PetscFunctionBegin; 44 PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev)); 45 for (i=start+1; i<start+bs; i++) { 46 PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur)); 47 PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged)); 48 cprevtmp = cprev; cprev = merged; merged = cprevtmp; 49 } 50 *ncollapsed = nprev; 51 if (collapsed) *collapsed = cprev; 52 PetscFunctionReturn(0); 53 } 54 55 /* -------------------------------------------------------------------------- */ 56 /* 57 PCGAMGCreateGraph - create simple scaled scalar graph from matrix 58 59 Input Parameter: 60 . Amat - matrix 61 Output Parameter: 62 . a_Gmaat - eoutput scalar graph (symmetric?) 63 */ 64 PetscErrorCode PCGAMGCreateGraph(Mat Amat, Mat *a_Gmat) 65 { 66 PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; 67 MPI_Comm comm; 68 Mat Gmat; 69 PetscBool ismpiaij,isseqaij; 70 71 PetscFunctionBegin; 72 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 73 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 74 PetscCall(MatGetSize(Amat, &MM, &NN)); 75 PetscCall(MatGetBlockSize(Amat, &bs)); 76 nloc = (Iend-Istart)/bs; 77 78 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 79 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 80 PetscCheck(isseqaij || ismpiaij,PETSC_COMM_WORLD,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 81 PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0)); 82 83 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 84 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 85 implementation */ 86 PetscCall(MatViewFromOptions(Amat, NULL, "-g_mat_view")); 87 if (bs > 1 && (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray)) { 88 PetscInt *d_nnz, *o_nnz; 89 Mat a, b, c; 90 MatScalar *aa,val,AA[4096]; 91 PetscInt *aj,*ai,AJ[4096],nc; 92 PetscCall(PetscInfo(Amat,"New bs>1 PCGAMGCreateGraph. nloc=%D\n",nloc)); 93 if (isseqaij) { 94 a = Amat; b = NULL; 95 } 96 else { 97 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; 98 a = d->A; b = d->B; 99 } 100 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 101 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 102 PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; 103 const PetscInt *cols; 104 for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows 105 PetscCall(MatGetRow(c,brow,&jj,&cols,NULL)); 106 nnz[brow/bs] = jj/bs; 107 if (jj%bs) ok = 0; 108 if (cols) j0 = cols[0]; 109 else j0 = -1; 110 PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL)); 111 if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; 112 for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks 113 PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL)); 114 if (jj%bs) ok = 0; 115 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 116 if (nnz[brow/bs] != jj/bs) ok = 0; 117 PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL)); 118 } 119 if (!ok) { 120 PetscCall(PetscFree2(d_nnz,o_nnz)); 121 goto old_bs; 122 } 123 } 124 PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %D too small %D.",nmax,4096); 125 } 126 PetscCall(MatCreate(comm, &Gmat)); 127 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 128 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 129 PetscCall(MatSetType(Gmat, MATAIJ)); 130 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 131 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 132 PetscCall(PetscFree2(d_nnz,o_nnz)); 133 // diag 134 for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows 135 Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; 136 ai = aseq->i; 137 n = ai[brow+1] - ai[brow]; 138 aj = aseq->j + ai[brow]; 139 for (int k=0; k<n; k += bs) { // block columns 140 AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart) 141 val = 0; 142 for (int ii=0; ii<bs; ii++) { // rows in block 143 aa = aseq->a + ai[brow+ii] + k; 144 for (int jj=0; jj<bs; jj++) { // columns in block 145 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 146 } 147 } 148 AA[k/bs] = val; 149 } 150 grow = Istart/bs + brow/bs; 151 PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES)); 152 } 153 // off-diag 154 if (ismpiaij) { 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Amat->data; 156 const PetscScalar *vals; 157 const PetscInt *cols, *garray = aij->garray; 158 PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); 159 for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows 160 PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL)); 161 for (int k=0,cidx=0; k<ncols; k += bs,cidx++) { 162 AA[k/bs] = 0; 163 AJ[cidx] = garray[cols[k]]/bs; 164 } 165 nc = ncols/bs; 166 PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL)); 167 for (int ii=0; ii<bs; ii++) { // rows in block 168 PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals)); 169 for (int k=0; k<ncols; k += bs) { 170 for (int jj=0; jj<bs; jj++) { // cols in block 171 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj])); 172 } 173 } 174 PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals)); 175 } 176 grow = Istart/bs + brow/bs; 177 PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES)); 178 } 179 } 180 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 181 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 182 PetscCall(MatViewFromOptions(Gmat, NULL, "-g_mat_view")); 183 } else if (bs > 1) { 184 const PetscScalar *vals; 185 const PetscInt *idx; 186 PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; 187 188 old_bs: 189 /* 190 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 191 */ 192 193 PetscCall(PetscInfo(Amat,"OLD bs>1 PCGAMGCreateGraph\n")); 194 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 195 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 196 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 197 198 if (isseqaij) { 199 PetscInt max_d_nnz; 200 201 /* 202 Determine exact preallocation count for (sequential) scalar matrix 203 */ 204 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz)); 205 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 206 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 207 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 208 PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 209 } 210 PetscCall(PetscFree3(w0,w1,w2)); 211 212 } else if (ismpiaij) { 213 Mat Daij,Oaij; 214 const PetscInt *garray; 215 PetscInt max_d_nnz; 216 217 PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray)); 218 219 /* 220 Determine exact preallocation count for diagonal block portion of scalar matrix 221 */ 222 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz)); 223 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 224 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 225 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 226 PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 227 } 228 PetscCall(PetscFree3(w0,w1,w2)); 229 230 /* 231 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 232 */ 233 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 234 o_nnz[jj] = 0; 235 for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 236 PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 237 o_nnz[jj] += ncols; 238 PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 239 } 240 if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 241 } 242 243 } else SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"Require AIJ matrix type"); 244 245 /* get scalar copy (norms) of matrix */ 246 PetscCall(MatCreate(comm, &Gmat)); 247 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 248 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 249 PetscCall(MatSetType(Gmat, MATAIJ)); 250 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 251 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 252 PetscCall(PetscFree2(d_nnz,o_nnz)); 253 254 for (Ii = Istart; Ii < Iend; Ii++) { 255 PetscInt dest_row = Ii/bs; 256 PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals)); 257 for (jj=0; jj<ncols; jj++) { 258 PetscInt dest_col = idx[jj]/bs; 259 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 260 PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES)); 261 } 262 PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals)); 263 } 264 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 265 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 266 PetscCall(MatViewFromOptions(Gmat, NULL, "-g_mat_view")); 267 } else { 268 /* just copy scalar matrix - abs() not taken here but scaled later */ 269 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 270 } 271 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 272 273 PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0)); 274 275 *a_Gmat = Gmat; 276 PetscFunctionReturn(0); 277 } 278 279 /* -------------------------------------------------------------------------- */ 280 /*@C 281 PCGAMGFilterGraph - filter (remove zero and possibly small values from the) graph and make it symmetric if requested 282 283 Collective on Mat 284 285 Input Parameters: 286 + a_Gmat - the graph 287 . vfilter - threshold parameter [0,1) 288 - symm - make the result symmetric 289 290 Level: developer 291 292 Notes: 293 This is called before graph coarsers are called. 294 295 .seealso: PCGAMGSetThreshold() 296 @*/ 297 PetscErrorCode PCGAMGFilterGraph(Mat *a_Gmat,PetscReal vfilter,PetscBool symm) 298 { 299 PetscInt Istart,Iend,Ii,jj,ncols,nnz0,nnz1, NN, MM, nloc; 300 PetscMPIInt rank; 301 Mat Gmat = *a_Gmat, tGmat; 302 MPI_Comm comm; 303 const PetscScalar *vals; 304 const PetscInt *idx; 305 PetscInt *d_nnz, *o_nnz; 306 Vec diag; 307 308 PetscFunctionBegin; 309 PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[SET16],0,0,0,0)); 310 311 /* TODO GPU: optimization proposal, each class provides fast implementation of this 312 procedure via MatAbs API */ 313 if (vfilter < 0.0 && !symm) { 314 /* Just use the provided matrix as the graph but make all values positive */ 315 MatInfo info; 316 PetscScalar *avals; 317 PetscBool isaij,ismpiaij; 318 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isaij)); 319 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATMPIAIJ,&ismpiaij)); 320 PetscCheck(isaij || ismpiaij,PETSC_COMM_WORLD,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 321 if (isaij) { 322 PetscCall(MatGetInfo(Gmat,MAT_LOCAL,&info)); 323 PetscCall(MatSeqAIJGetArray(Gmat,&avals)); 324 for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 325 PetscCall(MatSeqAIJRestoreArray(Gmat,&avals)); 326 } else { 327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Gmat->data; 328 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 329 PetscCall(MatSeqAIJGetArray(aij->A,&avals)); 330 for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 331 PetscCall(MatSeqAIJRestoreArray(aij->A,&avals)); 332 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 333 PetscCall(MatSeqAIJGetArray(aij->B,&avals)); 334 for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 335 PetscCall(MatSeqAIJRestoreArray(aij->B,&avals)); 336 } 337 PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[SET16],0,0,0,0)); 338 PetscFunctionReturn(0); 339 } 340 341 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 342 Also, if the matrix is symmetric, can we skip this 343 operation? It can be very expensive on large matrices. */ 344 PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm)); 345 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 346 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 347 nloc = Iend - Istart; 348 PetscCall(MatGetSize(Gmat, &MM, &NN)); 349 350 if (symm) { 351 Mat matTrans; 352 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 353 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 354 PetscCall(MatDestroy(&matTrans)); 355 } 356 357 /* scale Gmat for all values between -1 and 1 */ 358 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 359 PetscCall(MatGetDiagonal(Gmat, diag)); 360 PetscCall(VecReciprocal(diag)); 361 PetscCall(VecSqrtAbs(diag)); 362 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 363 PetscCall(VecDestroy(&diag)); 364 365 /* Determine upper bound on nonzeros needed in new filtered matrix */ 366 PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz)); 367 for (Ii = Istart, jj = 0; Ii < Iend; Ii++, jj++) { 368 PetscCall(MatGetRow(Gmat,Ii,&ncols,NULL,NULL)); 369 d_nnz[jj] = ncols; 370 o_nnz[jj] = ncols; 371 PetscCall(MatRestoreRow(Gmat,Ii,&ncols,NULL,NULL)); 372 if (d_nnz[jj] > nloc) d_nnz[jj] = nloc; 373 if (o_nnz[jj] > (MM-nloc)) o_nnz[jj] = MM - nloc; 374 } 375 PetscCall(MatCreate(comm, &tGmat)); 376 PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM)); 377 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 378 PetscCall(MatSetType(tGmat, MATAIJ)); 379 PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz)); 380 PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz)); 381 PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 382 PetscCall(PetscFree2(d_nnz,o_nnz)); 383 384 for (Ii = Istart, nnz0 = nnz1 = 0; Ii < Iend; Ii++) { 385 PetscCall(MatGetRow(Gmat,Ii,&ncols,&idx,&vals)); 386 for (jj=0; jj<ncols; jj++,nnz0++) { 387 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 388 if (PetscRealPart(sv) > vfilter) { 389 nnz1++; 390 PetscCall(MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,INSERT_VALUES)); 391 } 392 } 393 PetscCall(MatRestoreRow(Gmat,Ii,&ncols,&idx,&vals)); 394 } 395 PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY)); 396 PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY)); 397 if (symm) { 398 PetscCall(MatSetOption(tGmat,MAT_SYMMETRIC,PETSC_TRUE)); 399 } else { 400 PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); 401 } 402 PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[SET16],0,0,0,0)); 403 404 #if defined(PETSC_USE_INFO) 405 { 406 double t1 = (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, t2 = (!nloc) ? 1. : (double)nnz0/(double)nloc; 407 PetscCall(PetscInfo(*a_Gmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%D)\n",t1,vfilter,t2,MM)); 408 } 409 #endif 410 PetscCall(MatDestroy(&Gmat)); 411 *a_Gmat = tGmat; 412 PetscFunctionReturn(0); 413 } 414 415 /* -------------------------------------------------------------------------- */ 416 /* 417 PCGAMGGetDataWithGhosts - hacks into Mat MPIAIJ so this must have size > 1 418 419 Input Parameter: 420 . Gmat - MPIAIJ matrix for scattters 421 . data_sz - number of data terms per node (# cols in output) 422 . data_in[nloc*data_sz] - column oriented data 423 Output Parameter: 424 . a_stride - numbrt of rows of output 425 . a_data_out[stride*data_sz] - output data with ghosts 426 */ 427 PetscErrorCode PCGAMGGetDataWithGhosts(Mat Gmat,PetscInt data_sz,PetscReal data_in[],PetscInt *a_stride,PetscReal **a_data_out) 428 { 429 Vec tmp_crds; 430 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ*)Gmat->data; 431 PetscInt nnodes,num_ghosts,dir,kk,jj,my0,Iend,nloc; 432 PetscScalar *data_arr; 433 PetscReal *datas; 434 PetscBool isMPIAIJ; 435 436 PetscFunctionBegin; 437 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATMPIAIJ, &isMPIAIJ)); 438 PetscCall(MatGetOwnershipRange(Gmat, &my0, &Iend)); 439 nloc = Iend - my0; 440 PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts)); 441 nnodes = num_ghosts + nloc; 442 *a_stride = nnodes; 443 PetscCall(MatCreateVecs(Gmat, &tmp_crds, NULL)); 444 445 PetscCall(PetscMalloc1(data_sz*nnodes, &datas)); 446 for (dir=0; dir<data_sz; dir++) { 447 /* set local, and global */ 448 for (kk=0; kk<nloc; kk++) { 449 PetscInt gid = my0 + kk; 450 PetscScalar crd = (PetscScalar)data_in[dir*nloc + kk]; /* col oriented */ 451 datas[dir*nnodes + kk] = PetscRealPart(crd); 452 453 PetscCall(VecSetValues(tmp_crds, 1, &gid, &crd, INSERT_VALUES)); 454 } 455 PetscCall(VecAssemblyBegin(tmp_crds)); 456 PetscCall(VecAssemblyEnd(tmp_crds)); 457 /* get ghost datas */ 458 PetscCall(VecScatterBegin(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 459 PetscCall(VecScatterEnd(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 460 PetscCall(VecGetArray(mpimat->lvec, &data_arr)); 461 for (kk=nloc,jj=0;jj<num_ghosts;kk++,jj++) datas[dir*nnodes + kk] = PetscRealPart(data_arr[jj]); 462 PetscCall(VecRestoreArray(mpimat->lvec, &data_arr)); 463 } 464 PetscCall(VecDestroy(&tmp_crds)); 465 *a_data_out = datas; 466 PetscFunctionReturn(0); 467 } 468 469 PetscErrorCode PCGAMGHashTableCreate(PetscInt a_size, PCGAMGHashTable *a_tab) 470 { 471 PetscInt kk; 472 473 PetscFunctionBegin; 474 a_tab->size = a_size; 475 PetscCall(PetscMalloc2(a_size, &a_tab->table,a_size, &a_tab->data)); 476 for (kk=0; kk<a_size; kk++) a_tab->table[kk] = -1; 477 PetscFunctionReturn(0); 478 } 479 480 PetscErrorCode PCGAMGHashTableDestroy(PCGAMGHashTable *a_tab) 481 { 482 PetscFunctionBegin; 483 PetscCall(PetscFree2(a_tab->table,a_tab->data)); 484 PetscFunctionReturn(0); 485 } 486 487 PetscErrorCode PCGAMGHashTableAdd(PCGAMGHashTable *a_tab, PetscInt a_key, PetscInt a_data) 488 { 489 PetscInt kk,idx; 490 491 PetscFunctionBegin; 492 PetscCheckFalse(a_key<0,PETSC_COMM_SELF,PETSC_ERR_USER,"Negative key %D.",a_key); 493 for (kk = 0, idx = GAMG_HASH(a_key); kk < a_tab->size; kk++, idx = (idx==(a_tab->size-1)) ? 0 : idx + 1) { 494 if (a_tab->table[idx] == a_key) { 495 /* exists */ 496 a_tab->data[idx] = a_data; 497 break; 498 } else if (a_tab->table[idx] == -1) { 499 /* add */ 500 a_tab->table[idx] = a_key; 501 a_tab->data[idx] = a_data; 502 break; 503 } 504 } 505 if (kk==a_tab->size) { 506 /* this is not to efficient, waiting until completely full */ 507 PetscInt oldsize = a_tab->size, new_size = 2*a_tab->size + 5, *oldtable = a_tab->table, *olddata = a_tab->data; 508 509 a_tab->size = new_size; 510 PetscCall(PetscMalloc2(a_tab->size, &a_tab->table,a_tab->size, &a_tab->data)); 511 for (kk=0;kk<a_tab->size;kk++) a_tab->table[kk] = -1; 512 for (kk=0;kk<oldsize;kk++) { 513 if (oldtable[kk] != -1) { 514 PetscCall(PCGAMGHashTableAdd(a_tab, oldtable[kk], olddata[kk])); 515 } 516 } 517 PetscCall(PetscFree2(oldtable,olddata)); 518 PetscCall(PCGAMGHashTableAdd(a_tab, a_key, a_data)); 519 } 520 PetscFunctionReturn(0); 521 } 522