1 /* 2 GAMG geometric-algebric multigrid PC - Mark Adams 2011 3 */ 4 #include <petsc/private/matimpl.h> 5 #include <../src/ksp/pc/impls/gamg/gamg.h> /*I "petscpc.h" I*/ 6 #include <petsc/private/kspimpl.h> 7 8 /* 9 Produces a set of block column indices of the matrix row, one for each block represented in the original row 10 11 n - the number of block indices in cc[] 12 cc - the block indices (must be large enough to contain the indices) 13 */ 14 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 15 { 16 PetscInt cnt = -1,nidx,j; 17 const PetscInt *idx; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 ierr = MatGetRow(Amat,row,&nidx,&idx,NULL);CHKERRQ(ierr); 22 if (nidx) { 23 cnt = 0; 24 cc[cnt] = idx[0]/bs; 25 for (j=1; j<nidx; j++) { 26 if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 27 } 28 } 29 ierr = MatRestoreRow(Amat,row,&nidx,&idx,NULL);CHKERRQ(ierr); 30 *n = cnt+1; 31 PetscFunctionReturn(0); 32 } 33 34 /* 35 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 36 37 ncollapsed - the number of block indices 38 collapsed - the block indices (must be large enough to contain the indices) 39 */ 40 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 41 { 42 PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 43 PetscErrorCode ierr; 44 45 PetscFunctionBegin; 46 ierr = MatCollapseRow(Amat,start,bs,&nprev,cprev);CHKERRQ(ierr); 47 for (i=start+1; i<start+bs; i++) { 48 ierr = MatCollapseRow(Amat,i,bs,&ncur,ccur);CHKERRQ(ierr); 49 ierr = PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged);CHKERRQ(ierr); 50 cprevtmp = cprev; cprev = merged; merged = cprevtmp; 51 } 52 *ncollapsed = nprev; 53 if (collapsed) *collapsed = cprev; 54 PetscFunctionReturn(0); 55 } 56 57 /* -------------------------------------------------------------------------- */ 58 /* 59 PCGAMGCreateGraph - create simple scaled scalar graph from matrix 60 61 Input Parameter: 62 . Amat - matrix 63 Output Parameter: 64 . a_Gmaat - eoutput scalar graph (symmetric?) 65 */ 66 PetscErrorCode PCGAMGCreateGraph(Mat Amat, Mat *a_Gmat) 67 { 68 PetscErrorCode ierr; 69 PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; 70 MPI_Comm comm; 71 Mat Gmat; 72 PetscBool ismpiaij,isseqaij; 73 74 PetscFunctionBegin; 75 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 76 ierr = MatGetOwnershipRange(Amat, &Istart, &Iend);CHKERRQ(ierr); 77 ierr = MatGetSize(Amat, &MM, &NN);CHKERRQ(ierr); 78 ierr = MatGetBlockSize(Amat, &bs);CHKERRQ(ierr); 79 nloc = (Iend-Istart)/bs; 80 81 ierr = PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 82 ierr = PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 83 PetscCheckFalse(!isseqaij && !ismpiaij,PETSC_COMM_WORLD,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 84 ierr = PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr); 85 86 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 87 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 88 implementation */ 89 ierr = MatViewFromOptions(Amat, NULL, "-g_mat_view");CHKERRQ(ierr); 90 if (bs > 1 && (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray)) { 91 PetscInt *d_nnz, *o_nnz; 92 Mat a, b, c; 93 MatScalar *aa,val,AA[4096]; 94 PetscInt *aj,*ai,AJ[4096],nc; 95 ierr = PetscInfo(Amat,"New bs>1 PCGAMGCreateGraph. nloc=%D\n",nloc);CHKERRQ(ierr); 96 if (isseqaij) { 97 a = Amat; b = NULL; 98 } 99 else { 100 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; 101 a = d->A; b = d->B; 102 } 103 ierr = PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz);CHKERRQ(ierr); 104 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 105 PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; 106 const PetscInt *cols; 107 for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows 108 ierr = MatGetRow(c,brow,&jj,&cols,NULL);CHKERRQ(ierr); 109 nnz[brow/bs] = jj/bs; 110 if (jj%bs) ok = 0; 111 if (cols) j0 = cols[0]; 112 else j0 = -1; 113 ierr = MatRestoreRow(c,brow,&jj,&cols,NULL);CHKERRQ(ierr); 114 if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; 115 for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks 116 ierr = MatGetRow(c,brow+ii,&jj,&cols,NULL);CHKERRQ(ierr); 117 if (jj%bs) ok = 0; 118 if (j0 != cols[0]) ok = 0; 119 if (nnz[brow/bs] != jj/bs) ok = 0; 120 ierr = MatRestoreRow(c,brow+11,&jj,&cols,NULL);CHKERRQ(ierr); 121 } 122 if (!ok) { 123 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 124 goto old_bs; 125 } 126 } 127 PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %D too small %D.",nmax,4096); 128 } 129 ierr = MatCreate(comm, &Gmat);CHKERRQ(ierr); 130 ierr = MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 131 ierr = MatSetBlockSizes(Gmat, 1, 1);CHKERRQ(ierr); 132 ierr = MatSetType(Gmat, MATAIJ);CHKERRQ(ierr); 133 ierr = MatSeqAIJSetPreallocation(Gmat,0,d_nnz);CHKERRQ(ierr); 134 ierr = MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 135 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 136 // diag 137 for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows 138 Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; 139 ai = aseq->i; 140 n = ai[brow+1] - ai[brow]; 141 aj = aseq->j + ai[brow]; 142 for (int k=0; k<n; k += bs) { // block columns 143 AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart) 144 val = 0; 145 for (int ii=0; ii<bs; ii++) { // rows in block 146 aa = aseq->a + ai[brow+ii] + k; 147 for (int jj=0; jj<bs; jj++) { // columns in block 148 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 149 } 150 } 151 AA[k/bs] = val; 152 } 153 grow = Istart/bs + brow/bs; 154 ierr = MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES);CHKERRQ(ierr); 155 } 156 // off-diag 157 if (ismpiaij) { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Amat->data; 159 const PetscScalar *vals; 160 const PetscInt *cols, *garray = aij->garray; 161 PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); 162 for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows 163 ierr = MatGetRow(b,brow,&ncols,&cols,NULL);CHKERRQ(ierr); 164 for (int k=0,cidx=0; k<ncols; k += bs,cidx++) { 165 AA[k/bs] = 0; 166 AJ[cidx] = garray[cols[k]]/bs; 167 } 168 nc = ncols/bs; 169 ierr = MatRestoreRow(b,brow,&ncols,&cols,NULL);CHKERRQ(ierr); 170 for (int ii=0; ii<bs; ii++) { // rows in block 171 ierr = MatGetRow(b,brow+ii,&ncols,&cols,&vals);CHKERRQ(ierr); 172 for (int k=0; k<ncols; k += bs) { 173 for (int jj=0; jj<bs; jj++) { // cols in block 174 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj])); 175 } 176 } 177 ierr = MatRestoreRow(b,brow+ii,&ncols,&cols,&vals);CHKERRQ(ierr); 178 } 179 grow = Istart/bs + brow/bs; 180 ierr = MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES);CHKERRQ(ierr); 181 } 182 } 183 ierr = MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 184 ierr = MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 185 ierr = MatViewFromOptions(Gmat, NULL, "-g_mat_view");CHKERRQ(ierr); 186 } else if (bs > 1) { 187 const PetscScalar *vals; 188 const PetscInt *idx; 189 PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; 190 191 old_bs: 192 /* 193 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 194 */ 195 196 ierr = PetscInfo(Amat,"OLD bs>1 PCGAMGCreateGraph\n");CHKERRQ(ierr); 197 ierr = PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 198 ierr = PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 199 ierr = PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz);CHKERRQ(ierr); 200 201 if (isseqaij) { 202 PetscInt max_d_nnz; 203 204 /* 205 Determine exact preallocation count for (sequential) scalar matrix 206 */ 207 ierr = MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz);CHKERRQ(ierr); 208 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 209 ierr = PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2);CHKERRQ(ierr); 210 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 211 ierr = MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL);CHKERRQ(ierr); 212 } 213 ierr = PetscFree3(w0,w1,w2);CHKERRQ(ierr); 214 215 } else if (ismpiaij) { 216 Mat Daij,Oaij; 217 const PetscInt *garray; 218 PetscInt max_d_nnz; 219 220 ierr = MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray);CHKERRQ(ierr); 221 222 /* 223 Determine exact preallocation count for diagonal block portion of scalar matrix 224 */ 225 ierr = MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz);CHKERRQ(ierr); 226 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 227 ierr = PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2);CHKERRQ(ierr); 228 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 229 ierr = MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL);CHKERRQ(ierr); 230 } 231 ierr = PetscFree3(w0,w1,w2);CHKERRQ(ierr); 232 233 /* 234 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 235 */ 236 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 237 o_nnz[jj] = 0; 238 for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 239 ierr = MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL);CHKERRQ(ierr); 240 o_nnz[jj] += ncols; 241 ierr = MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL);CHKERRQ(ierr); 242 } 243 if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 244 } 245 246 } else SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"Require AIJ matrix type"); 247 248 /* get scalar copy (norms) of matrix */ 249 ierr = MatCreate(comm, &Gmat);CHKERRQ(ierr); 250 ierr = MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 251 ierr = MatSetBlockSizes(Gmat, 1, 1);CHKERRQ(ierr); 252 ierr = MatSetType(Gmat, MATAIJ);CHKERRQ(ierr); 253 ierr = MatSeqAIJSetPreallocation(Gmat,0,d_nnz);CHKERRQ(ierr); 254 ierr = MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 255 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 256 257 for (Ii = Istart; Ii < Iend; Ii++) { 258 PetscInt dest_row = Ii/bs; 259 ierr = MatGetRow(Amat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr); 260 for (jj=0; jj<ncols; jj++) { 261 PetscInt dest_col = idx[jj]/bs; 262 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 263 ierr = MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES);CHKERRQ(ierr); 264 } 265 ierr = MatRestoreRow(Amat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr); 266 } 267 ierr = MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 268 ierr = MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatViewFromOptions(Gmat, NULL, "-g_mat_view");CHKERRQ(ierr); 270 } else { 271 /* just copy scalar matrix - abs() not taken here but scaled later */ 272 ierr = MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat);CHKERRQ(ierr); 273 } 274 ierr = MatPropagateSymmetryOptions(Amat, Gmat);CHKERRQ(ierr); 275 276 ierr = PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);CHKERRQ(ierr); 277 278 *a_Gmat = Gmat; 279 PetscFunctionReturn(0); 280 } 281 282 /* -------------------------------------------------------------------------- */ 283 /*@C 284 PCGAMGFilterGraph - filter (remove zero and possibly small values from the) graph and make it symmetric if requested 285 286 Collective on Mat 287 288 Input Parameters: 289 + a_Gmat - the graph 290 . vfilter - threshold parameter [0,1) 291 - symm - make the result symmetric 292 293 Level: developer 294 295 Notes: 296 This is called before graph coarsers are called. 297 298 .seealso: PCGAMGSetThreshold() 299 @*/ 300 PetscErrorCode PCGAMGFilterGraph(Mat *a_Gmat,PetscReal vfilter,PetscBool symm) 301 { 302 PetscErrorCode ierr; 303 PetscInt Istart,Iend,Ii,jj,ncols,nnz0,nnz1, NN, MM, nloc; 304 PetscMPIInt rank; 305 Mat Gmat = *a_Gmat, tGmat; 306 MPI_Comm comm; 307 const PetscScalar *vals; 308 const PetscInt *idx; 309 PetscInt *d_nnz, *o_nnz; 310 Vec diag; 311 312 PetscFunctionBegin; 313 ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET16],0,0,0,0);CHKERRQ(ierr); 314 315 /* TODO GPU: optimization proposal, each class provides fast implementation of this 316 procedure via MatAbs API */ 317 if (vfilter < 0.0 && !symm) { 318 /* Just use the provided matrix as the graph but make all values positive */ 319 MatInfo info; 320 PetscScalar *avals; 321 PetscBool isaij,ismpiaij; 322 ierr = PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isaij);CHKERRQ(ierr); 323 ierr = PetscObjectBaseTypeCompare((PetscObject)Gmat,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 324 PetscCheckFalse(!isaij && !ismpiaij,PETSC_COMM_WORLD,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 325 if (isaij) { 326 ierr = MatGetInfo(Gmat,MAT_LOCAL,&info);CHKERRQ(ierr); 327 ierr = MatSeqAIJGetArray(Gmat,&avals);CHKERRQ(ierr); 328 for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 329 ierr = MatSeqAIJRestoreArray(Gmat,&avals);CHKERRQ(ierr); 330 } else { 331 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Gmat->data; 332 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 333 ierr = MatSeqAIJGetArray(aij->A,&avals);CHKERRQ(ierr); 334 for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 335 ierr = MatSeqAIJRestoreArray(aij->A,&avals);CHKERRQ(ierr); 336 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 337 ierr = MatSeqAIJGetArray(aij->B,&avals);CHKERRQ(ierr); 338 for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 339 ierr = MatSeqAIJRestoreArray(aij->B,&avals);CHKERRQ(ierr); 340 } 341 ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET16],0,0,0,0);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 346 Also, if the matrix is symmetric, can we skip this 347 operation? It can be very expensive on large matrices. */ 348 ierr = PetscObjectGetComm((PetscObject)Gmat,&comm);CHKERRQ(ierr); 349 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 350 ierr = MatGetOwnershipRange(Gmat, &Istart, &Iend);CHKERRQ(ierr); 351 nloc = Iend - Istart; 352 ierr = MatGetSize(Gmat, &MM, &NN);CHKERRQ(ierr); 353 354 if (symm) { 355 Mat matTrans; 356 ierr = MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans);CHKERRQ(ierr); 357 ierr = MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 358 ierr = MatDestroy(&matTrans);CHKERRQ(ierr); 359 } 360 361 /* scale Gmat for all values between -1 and 1 */ 362 ierr = MatCreateVecs(Gmat, &diag, NULL);CHKERRQ(ierr); 363 ierr = MatGetDiagonal(Gmat, diag);CHKERRQ(ierr); 364 ierr = VecReciprocal(diag);CHKERRQ(ierr); 365 ierr = VecSqrtAbs(diag);CHKERRQ(ierr); 366 ierr = MatDiagonalScale(Gmat, diag, diag);CHKERRQ(ierr); 367 ierr = VecDestroy(&diag);CHKERRQ(ierr); 368 369 /* Determine upper bound on nonzeros needed in new filtered matrix */ 370 ierr = PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz);CHKERRQ(ierr); 371 for (Ii = Istart, jj = 0; Ii < Iend; Ii++, jj++) { 372 ierr = MatGetRow(Gmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr); 373 d_nnz[jj] = ncols; 374 o_nnz[jj] = ncols; 375 ierr = MatRestoreRow(Gmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr); 376 if (d_nnz[jj] > nloc) d_nnz[jj] = nloc; 377 if (o_nnz[jj] > (MM-nloc)) o_nnz[jj] = MM - nloc; 378 } 379 ierr = MatCreate(comm, &tGmat);CHKERRQ(ierr); 380 ierr = MatSetSizes(tGmat,nloc,nloc,MM,MM);CHKERRQ(ierr); 381 ierr = MatSetBlockSizes(tGmat, 1, 1);CHKERRQ(ierr); 382 ierr = MatSetType(tGmat, MATAIJ);CHKERRQ(ierr); 383 ierr = MatSeqAIJSetPreallocation(tGmat,0,d_nnz);CHKERRQ(ierr); 384 ierr = MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 385 ierr = MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 386 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 387 388 for (Ii = Istart, nnz0 = nnz1 = 0; Ii < Iend; Ii++) { 389 ierr = MatGetRow(Gmat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr); 390 for (jj=0; jj<ncols; jj++,nnz0++) { 391 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 392 if (PetscRealPart(sv) > vfilter) { 393 nnz1++; 394 ierr = MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,INSERT_VALUES);CHKERRQ(ierr); 395 } 396 } 397 ierr = MatRestoreRow(Gmat,Ii,&ncols,&idx,&vals);CHKERRQ(ierr); 398 } 399 ierr = MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 400 ierr = MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 401 if (symm) { 402 ierr = MatSetOption(tGmat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 403 } else { 404 ierr = MatPropagateSymmetryOptions(Gmat,tGmat);CHKERRQ(ierr); 405 } 406 ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET16],0,0,0,0);CHKERRQ(ierr); 407 408 #if defined(PETSC_USE_INFO) 409 { 410 double t1 = (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, t2 = (!nloc) ? 1. : (double)nnz0/(double)nloc; 411 ierr = PetscInfo(*a_Gmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%D)\n",t1,vfilter,t2,MM);CHKERRQ(ierr); 412 } 413 #endif 414 ierr = MatDestroy(&Gmat);CHKERRQ(ierr); 415 *a_Gmat = tGmat; 416 PetscFunctionReturn(0); 417 } 418 419 /* -------------------------------------------------------------------------- */ 420 /* 421 PCGAMGGetDataWithGhosts - hacks into Mat MPIAIJ so this must have size > 1 422 423 Input Parameter: 424 . Gmat - MPIAIJ matrix for scattters 425 . data_sz - number of data terms per node (# cols in output) 426 . data_in[nloc*data_sz] - column oriented data 427 Output Parameter: 428 . a_stride - numbrt of rows of output 429 . a_data_out[stride*data_sz] - output data with ghosts 430 */ 431 PetscErrorCode PCGAMGGetDataWithGhosts(Mat Gmat,PetscInt data_sz,PetscReal data_in[],PetscInt *a_stride,PetscReal **a_data_out) 432 { 433 PetscErrorCode ierr; 434 Vec tmp_crds; 435 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ*)Gmat->data; 436 PetscInt nnodes,num_ghosts,dir,kk,jj,my0,Iend,nloc; 437 PetscScalar *data_arr; 438 PetscReal *datas; 439 PetscBool isMPIAIJ; 440 441 PetscFunctionBegin; 442 ierr = PetscObjectBaseTypeCompare((PetscObject)Gmat, MATMPIAIJ, &isMPIAIJ);CHKERRQ(ierr); 443 ierr = MatGetOwnershipRange(Gmat, &my0, &Iend);CHKERRQ(ierr); 444 nloc = Iend - my0; 445 ierr = VecGetLocalSize(mpimat->lvec, &num_ghosts);CHKERRQ(ierr); 446 nnodes = num_ghosts + nloc; 447 *a_stride = nnodes; 448 ierr = MatCreateVecs(Gmat, &tmp_crds, NULL);CHKERRQ(ierr); 449 450 ierr = PetscMalloc1(data_sz*nnodes, &datas);CHKERRQ(ierr); 451 for (dir=0; dir<data_sz; dir++) { 452 /* set local, and global */ 453 for (kk=0; kk<nloc; kk++) { 454 PetscInt gid = my0 + kk; 455 PetscScalar crd = (PetscScalar)data_in[dir*nloc + kk]; /* col oriented */ 456 datas[dir*nnodes + kk] = PetscRealPart(crd); 457 458 ierr = VecSetValues(tmp_crds, 1, &gid, &crd, INSERT_VALUES);CHKERRQ(ierr); 459 } 460 ierr = VecAssemblyBegin(tmp_crds);CHKERRQ(ierr); 461 ierr = VecAssemblyEnd(tmp_crds);CHKERRQ(ierr); 462 /* get ghost datas */ 463 ierr = VecScatterBegin(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 464 ierr = VecScatterEnd(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 ierr = VecGetArray(mpimat->lvec, &data_arr);CHKERRQ(ierr); 466 for (kk=nloc,jj=0;jj<num_ghosts;kk++,jj++) datas[dir*nnodes + kk] = PetscRealPart(data_arr[jj]); 467 ierr = VecRestoreArray(mpimat->lvec, &data_arr);CHKERRQ(ierr); 468 } 469 ierr = VecDestroy(&tmp_crds);CHKERRQ(ierr); 470 *a_data_out = datas; 471 PetscFunctionReturn(0); 472 } 473 474 PetscErrorCode PCGAMGHashTableCreate(PetscInt a_size, PCGAMGHashTable *a_tab) 475 { 476 PetscErrorCode ierr; 477 PetscInt kk; 478 479 PetscFunctionBegin; 480 a_tab->size = a_size; 481 ierr = PetscMalloc2(a_size, &a_tab->table,a_size, &a_tab->data);CHKERRQ(ierr); 482 for (kk=0; kk<a_size; kk++) a_tab->table[kk] = -1; 483 PetscFunctionReturn(0); 484 } 485 486 PetscErrorCode PCGAMGHashTableDestroy(PCGAMGHashTable *a_tab) 487 { 488 PetscErrorCode ierr; 489 490 PetscFunctionBegin; 491 ierr = PetscFree2(a_tab->table,a_tab->data);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 PetscErrorCode PCGAMGHashTableAdd(PCGAMGHashTable *a_tab, PetscInt a_key, PetscInt a_data) 496 { 497 PetscInt kk,idx; 498 499 PetscFunctionBegin; 500 PetscCheckFalse(a_key<0,PETSC_COMM_SELF,PETSC_ERR_USER,"Negative key %D.",a_key); 501 for (kk = 0, idx = GAMG_HASH(a_key); kk < a_tab->size; kk++, idx = (idx==(a_tab->size-1)) ? 0 : idx + 1) { 502 if (a_tab->table[idx] == a_key) { 503 /* exists */ 504 a_tab->data[idx] = a_data; 505 break; 506 } else if (a_tab->table[idx] == -1) { 507 /* add */ 508 a_tab->table[idx] = a_key; 509 a_tab->data[idx] = a_data; 510 break; 511 } 512 } 513 if (kk==a_tab->size) { 514 /* this is not to efficient, waiting until completely full */ 515 PetscInt oldsize = a_tab->size, new_size = 2*a_tab->size + 5, *oldtable = a_tab->table, *olddata = a_tab->data; 516 PetscErrorCode ierr; 517 518 a_tab->size = new_size; 519 ierr = PetscMalloc2(a_tab->size, &a_tab->table,a_tab->size, &a_tab->data);CHKERRQ(ierr); 520 for (kk=0;kk<a_tab->size;kk++) a_tab->table[kk] = -1; 521 for (kk=0;kk<oldsize;kk++) { 522 if (oldtable[kk] != -1) { 523 ierr = PCGAMGHashTableAdd(a_tab, oldtable[kk], olddata[kk]);CHKERRQ(ierr); 524 } 525 } 526 ierr = PetscFree2(oldtable,olddata);CHKERRQ(ierr); 527 ierr = PCGAMGHashTableAdd(a_tab, a_key, a_data);CHKERRQ(ierr); 528 } 529 PetscFunctionReturn(0); 530 } 531