/* GAMG geometric-algebric multigrid PC - Mark Adams 2011 */ #include #include <../src/ksp/pc/impls/gamg/gamg.h> /*I "petscpc.h" I*/ #include /* Produces a set of block column indices of the matrix row, one for each block represented in the original row n - the number of block indices in cc[] cc - the block indices (must be large enough to contain the indices) */ static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) { PetscInt cnt = -1,nidx,j; const PetscInt *idx; PetscFunctionBegin; PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL)); if (nidx) { cnt = 0; cc[cnt] = idx[0]/bs; for (j=1; j= 0 */ PetscErrorCode PCGAMGCreateGraph(Mat Amat, Mat *a_Gmat, PetscBool symm) { PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; MPI_Comm comm; Mat Gmat; PetscBool ismpiaij,isseqaij; Mat a, b, c; PetscFunctionBegin; PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); PetscCall(MatGetSize(Amat, &MM, &NN)); PetscCall(MatGetBlockSize(Amat, &bs)); nloc = (Iend-Istart)/bs; PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast implementation */ PetscCall(MatViewFromOptions(Amat, NULL, "-g_mat_view")); if (bs > 1 && (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray)) { PetscInt *d_nnz, *o_nnz; MatScalar *aa,val,AA[4096]; PetscInt *aj,*ai,AJ[4096],nc; if (isseqaij) { a = Amat; b = NULL; } else { Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; a = d->A; b = d->B; } PetscCall(PetscInfo(Amat,"New bs>1 PCGAMGCreateGraph. nloc=%" PetscInt_FMT "\n",nloc)); PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; const PetscInt *cols; for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows PetscCall(MatGetRow(c,brow,&jj,&cols,NULL)); nnz[brow/bs] = jj/bs; if (jj%bs) ok = 0; if (cols) j0 = cols[0]; else j0 = -1; PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL)); if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL)); if (jj%bs) ok = 0; if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; if (nnz[brow/bs] != jj/bs) ok = 0; PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL)); } if (!ok) { PetscCall(PetscFree2(d_nnz,o_nnz)); goto old_bs; } } PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax); } PetscCall(MatCreate(comm, &Gmat)); PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); PetscCall(MatSetBlockSizes(Gmat, 1, 1)); PetscCall(MatSetType(Gmat, MATAIJ)); PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); PetscCall(PetscFree2(d_nnz,o_nnz)); // diag for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; ai = aseq->i; n = ai[brow+1] - ai[brow]; aj = aseq->j + ai[brow]; for (int k=0; ka + ai[brow+ii] + k; for (int jj=0; jjdata; const PetscScalar *vals; const PetscInt *cols, *garray = aij->garray; PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL)); for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) { AA[k/bs] = 0; AJ[cidx] = garray[cols[k]]/bs; } nc = ncols/bs; PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL)); for (int ii=0; ii 1) { const PetscScalar *vals; const PetscInt *idx; PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; old_bs: /* Determine the preallocation needed for the scalar matrix derived from the vector matrix. */ PetscCall(PetscInfo(Amat,"OLD bs>1 PCGAMGCreateGraph\n")); PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); if (isseqaij) { PetscInt max_d_nnz; /* Determine exact preallocation count for (sequential) scalar matrix */ PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz)); max_d_nnz = PetscMin(nloc,bs*max_d_nnz); PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); } PetscCall(PetscFree3(w0,w1,w2)); } else if (ismpiaij) { Mat Daij,Oaij; const PetscInt *garray; PetscInt max_d_nnz; PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray)); /* Determine exact preallocation count for diagonal block portion of scalar matrix */ PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz)); max_d_nnz = PetscMin(nloc,bs*max_d_nnz); PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); } PetscCall(PetscFree3(w0,w1,w2)); /* Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix */ for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { o_nnz[jj] = 0; for (kk=0; kk (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; } } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type"); /* get scalar copy (norms) of matrix */ PetscCall(MatCreate(comm, &Gmat)); PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); PetscCall(MatSetBlockSizes(Gmat, 1, 1)); PetscCall(MatSetType(Gmat, MATAIJ)); PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); PetscCall(PetscFree2(d_nnz,o_nnz)); for (Ii = Istart; Ii < Iend; Ii++) { PetscInt dest_row = Ii/bs; PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals)); for (jj=0; jjdata; a = d->A; b = d->B; } /* abs */ for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ MatInfo info; PetscScalar *avals; PetscCall(MatGetInfo(c,MAT_LOCAL,&info)); PetscCall(MatSeqAIJGetArray(c,&avals)); for (int jj = 0; jjstructurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); PetscCall(MatDestroy(&matTrans)); } PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE)); /* PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); -- a graph has to be symmetric and +. Normal Mat options are not relevant ? */ *a_Gmat = Gmat; PetscFunctionReturn(0); } /* -------------------------------------------------------------------------- */ /*@C PCGAMGFilterGraph - filter values with small absolute values (and make graph symmetric if requested). With vfilter < 0 just return. The user needs to check if the matrix has not changed if they allow for vfilter < 0 Collective on Mat Input Parameters: + a_Gmat - the graph . vfilter - threshold parameter [0,1) Output Parameter: . a_Gmat - output filtered scalar graph Level: developer Notes: This is called before graph coarsers are called. This could go into Mat, move 'symm' to GAMG .seealso: `PCGAMGSetThreshold()` @*/ PetscErrorCode PCGAMGFilterGraph(Mat *a_Gmat,PetscReal vfilter) { PetscInt Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc; Mat Gmat = *a_Gmat, tGmat; MPI_Comm comm; const PetscScalar *vals; const PetscInt *idx; PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, AJ[4096]; MatScalar AA[4096]; // this is checked in graph Vec diag; PetscBool ismpiaij,isseqaij; Mat a, b, c; PetscFunctionBegin; PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm)); PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij)); PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATMPIAIJ,&ismpiaij)); /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? Also, if the matrix is symmetric, can we skip this operation? It can be very expensive on large matrices. */ if (vfilter < 0.0) { /* nothing else to do, just return */ PetscFunctionReturn(0); } /* scale c for all values between -1 and 1 */ PetscCall(MatCreateVecs(Gmat, &diag, NULL)); PetscCall(MatGetDiagonal(Gmat, diag)); PetscCall(VecReciprocal(diag)); PetscCall(VecSqrtAbs(diag)); PetscCall(MatDiagonalScale(Gmat, diag, diag)); // global sizes PetscCall(MatGetSize(Gmat, &MM, &NN)); PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); nloc = Iend - Istart; PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz)); if (isseqaij) { a = Gmat; b = NULL; } else { Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; a = d->A; b = d->B; garray = d->garray; } /* Determine upper bound on non-zeros needed in new filtered matrix */ for (PetscInt row=0; row < nloc; row++) { PetscCall(MatGetRow(a,row,&ncols,NULL,NULL)); d_nnz[row] = ncols; PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL)); } if (b) { for (PetscInt row=0; row < nloc; row++) { PetscCall(MatGetRow(b,row,&ncols,NULL,NULL)); o_nnz[row] = ncols; PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL)); } } PetscCall(MatCreate(comm, &tGmat)); PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM)); PetscCall(MatSetBlockSizes(tGmat, 1, 1)); PetscCall(MatSetType(tGmat, MATAIJ)); PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz)); PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz)); PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); PetscCall(PetscFree2(d_nnz,o_nnz)); nnz0 = nnz1 = 0; for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) { PetscCall(MatGetRow(c,row,&ncols,&idx,&vals)); PetscCheck(ncols<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer, ncols = %" PetscInt_FMT ", too small 4096.",ncols); for (ncol_row=jj=0; jj vfilter) { nnz1++; PetscInt cid = idx[jj] + Istart; //diag if (c!=a) cid = garray[idx[jj]]; AA[ncol_row] = vals[jj]; AJ[ncol_row] = cid; ncol_row++; } } PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals)); PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES)); } } PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY)); PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */ PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ")\n", (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0/(double)nloc,MM)); PetscCall(MatDestroy(&Gmat)); PetscCall(VecDestroy(&diag)); *a_Gmat = tGmat; PetscFunctionReturn(0); } /* -------------------------------------------------------------------------- */ /* PCGAMGGetDataWithGhosts - hacks into Mat MPIAIJ so this must have size > 1 Input Parameter: . Gmat - MPIAIJ matrix for scattters . data_sz - number of data terms per node (# cols in output) . data_in[nloc*data_sz] - column oriented data Output Parameter: . a_stride - numbrt of rows of output . a_data_out[stride*data_sz] - output data with ghosts */ PetscErrorCode PCGAMGGetDataWithGhosts(Mat Gmat,PetscInt data_sz,PetscReal data_in[],PetscInt *a_stride,PetscReal **a_data_out) { Vec tmp_crds; Mat_MPIAIJ *mpimat = (Mat_MPIAIJ*)Gmat->data; PetscInt nnodes,num_ghosts,dir,kk,jj,my0,Iend,nloc; PetscScalar *data_arr; PetscReal *datas; PetscBool isMPIAIJ; PetscFunctionBegin; PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATMPIAIJ, &isMPIAIJ)); PetscCall(MatGetOwnershipRange(Gmat, &my0, &Iend)); nloc = Iend - my0; PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts)); nnodes = num_ghosts + nloc; *a_stride = nnodes; PetscCall(MatCreateVecs(Gmat, &tmp_crds, NULL)); PetscCall(PetscMalloc1(data_sz*nnodes, &datas)); for (dir=0; dirMvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD)); PetscCall(VecScatterEnd(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD)); PetscCall(VecGetArray(mpimat->lvec, &data_arr)); for (kk=nloc,jj=0;jjlvec, &data_arr)); } PetscCall(VecDestroy(&tmp_crds)); *a_data_out = datas; PetscFunctionReturn(0); } PetscErrorCode PCGAMGHashTableCreate(PetscInt a_size, PCGAMGHashTable *a_tab) { PetscInt kk; PetscFunctionBegin; a_tab->size = a_size; PetscCall(PetscMalloc2(a_size, &a_tab->table,a_size, &a_tab->data)); for (kk=0; kktable[kk] = -1; PetscFunctionReturn(0); } PetscErrorCode PCGAMGHashTableDestroy(PCGAMGHashTable *a_tab) { PetscFunctionBegin; PetscCall(PetscFree2(a_tab->table,a_tab->data)); PetscFunctionReturn(0); } PetscErrorCode PCGAMGHashTableAdd(PCGAMGHashTable *a_tab, PetscInt a_key, PetscInt a_data) { PetscInt kk,idx; PetscFunctionBegin; PetscCheck(a_key>=0,PETSC_COMM_SELF,PETSC_ERR_USER,"Negative key %" PetscInt_FMT ".",a_key); for (kk = 0, idx = GAMG_HASH(a_key); kk < a_tab->size; kk++, idx = (idx==(a_tab->size-1)) ? 0 : idx + 1) { if (a_tab->table[idx] == a_key) { /* exists */ a_tab->data[idx] = a_data; break; } else if (a_tab->table[idx] == -1) { /* add */ a_tab->table[idx] = a_key; a_tab->data[idx] = a_data; break; } } if (kk==a_tab->size) { /* this is not to efficient, waiting until completely full */ PetscInt oldsize = a_tab->size, new_size = 2*a_tab->size + 5, *oldtable = a_tab->table, *olddata = a_tab->data; a_tab->size = new_size; PetscCall(PetscMalloc2(a_tab->size, &a_tab->table,a_tab->size, &a_tab->data)); for (kk=0;kksize;kk++) a_tab->table[kk] = -1; for (kk=0;kk