1 /* 2 Factorization code for BAIJ format. 3 */ 4 #include <../src/mat/impls/baij/seq/baij.h> 5 #include <petsc/private/kernels/blockinvert.h> 6 7 /* 8 This is used to set the numeric factorization for both LU and ILU symbolic factorization 9 */ 10 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural) 11 { 12 PetscFunctionBegin; 13 if (natural) { 14 switch (fact->rmap->bs) { 15 case 1: 16 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 17 break; 18 case 2: 19 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 20 break; 21 case 3: 22 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 23 break; 24 case 4: 25 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 26 break; 27 case 5: 28 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 29 break; 30 case 6: 31 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 32 break; 33 case 7: 34 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 35 break; 36 case 9: 37 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 38 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering; 39 #else 40 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 41 #endif 42 break; 43 case 15: 44 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering; 45 break; 46 default: 47 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 48 break; 49 } 50 } else { 51 switch (fact->rmap->bs) { 52 case 1: 53 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 54 break; 55 case 2: 56 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 57 break; 58 case 3: 59 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 60 break; 61 case 4: 62 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 63 break; 64 case 5: 65 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 66 break; 67 case 6: 68 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 69 break; 70 case 7: 71 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 72 break; 73 default: 74 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 75 break; 76 } 77 } 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural) 82 { 83 PetscFunctionBegin; 84 if (natural) { 85 switch (inA->rmap->bs) { 86 case 1: 87 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_1_inplace; 88 break; 89 case 2: 90 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace; 91 break; 92 case 3: 93 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace; 94 break; 95 case 4: 96 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 97 break; 98 case 5: 99 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace; 100 break; 101 case 6: 102 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace; 103 break; 104 case 7: 105 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace; 106 break; 107 default: 108 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_N_inplace; 109 break; 110 } 111 } else { 112 switch (inA->rmap->bs) { 113 case 1: 114 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_1_inplace; 115 break; 116 case 2: 117 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_2_inplace; 118 break; 119 case 3: 120 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_3_inplace; 121 break; 122 case 4: 123 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_4_inplace; 124 break; 125 case 5: 126 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_5_inplace; 127 break; 128 case 6: 129 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_6_inplace; 130 break; 131 case 7: 132 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_7_inplace; 133 break; 134 default: 135 inA->ops->lufactornumeric = MatILUFactorNumeric_SeqBAIJ_N_inplace; 136 break; 137 } 138 } 139 PetscFunctionReturn(PETSC_SUCCESS); 140 } 141 142 /* 143 The symbolic factorization code is identical to that for AIJ format, 144 except for very small changes since this is now a SeqBAIJ datastructure. 145 NOT good code reuse. 146 */ 147 #include <petscbt.h> 148 #include <../src/mat/utils/freespace.h> 149 150 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 151 { 152 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 153 PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 154 PetscBool row_identity, col_identity, both_identity; 155 IS isicol; 156 const PetscInt *r, *ic; 157 PetscInt i, *ai = a->i, *aj = a->j; 158 PetscInt *bi, *bj, *ajtmp; 159 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 160 PetscReal f; 161 PetscInt nlnk, *lnk, k, **bi_ptr; 162 PetscFreeSpaceList free_space = NULL, current_space = NULL; 163 PetscBT lnkbt; 164 PetscBool diagDense; 165 166 PetscFunctionBegin; 167 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 168 PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, NULL, &diagDense)); 169 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry"); 170 171 if (bs > 1) { /* check shifttype */ 172 PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 173 } 174 175 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 176 PetscCall(ISGetIndices(isrow, &r)); 177 PetscCall(ISGetIndices(isicol, &ic)); 178 179 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 180 PetscCall(PetscMalloc1(n + 1, &bi)); 181 PetscCall(PetscMalloc1(n + 1, &bdiag)); 182 bi[0] = bdiag[0] = 0; 183 184 /* linked list for storing column indices of the active row */ 185 nlnk = n + 1; 186 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 187 188 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 189 190 /* initial FreeSpace size is f*(ai[n]+1) */ 191 f = info->fill; 192 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 193 194 current_space = free_space; 195 196 for (i = 0; i < n; i++) { 197 /* copy previous fill into linked list */ 198 nzi = 0; 199 nnz = ai[r[i] + 1] - ai[r[i]]; 200 ajtmp = aj + ai[r[i]]; 201 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 202 nzi += nlnk; 203 204 /* add pivot rows into linked list */ 205 row = lnk[n]; 206 while (row < i) { 207 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 208 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 209 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 210 nzi += nlnk; 211 row = lnk[row]; 212 } 213 bi[i + 1] = bi[i] + nzi; 214 im[i] = nzi; 215 216 /* mark bdiag */ 217 nzbd = 0; 218 nnz = nzi; 219 k = lnk[n]; 220 while (nnz-- && k < i) { 221 nzbd++; 222 k = lnk[k]; 223 } 224 bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 225 226 /* if free space is not available, make more free space */ 227 if (current_space->local_remaining < nzi) { 228 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */ 229 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 230 reallocs++; 231 } 232 233 /* copy data into free space, then initialize lnk */ 234 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 235 236 bi_ptr[i] = current_space->array; 237 current_space->array += nzi; 238 current_space->local_used += nzi; 239 current_space->local_remaining -= nzi; 240 } 241 242 PetscCall(ISRestoreIndices(isrow, &r)); 243 PetscCall(ISRestoreIndices(isicol, &ic)); 244 245 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 246 PetscCall(PetscMalloc1(bi[n], &bj)); 247 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 248 PetscCall(PetscLLDestroy(lnk, lnkbt)); 249 PetscCall(PetscFree2(bi_ptr, im)); 250 251 /* put together the new matrix */ 252 PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 253 b = (Mat_SeqBAIJ *)B->data; 254 255 b->free_ij = PETSC_TRUE; 256 PetscCall(PetscShmgetAllocateArray((bdiag[0] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a)); 257 b->free_a = PETSC_TRUE; 258 b->j = bj; 259 b->i = bi; 260 b->diag = bdiag; 261 b->ilen = NULL; 262 b->imax = NULL; 263 b->row = isrow; 264 b->col = iscol; 265 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 266 267 PetscCall(PetscObjectReference((PetscObject)isrow)); 268 PetscCall(PetscObjectReference((PetscObject)iscol)); 269 b->icol = isicol; 270 PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 271 272 b->maxnz = b->nz = bdiag[0] + 1; 273 274 B->factortype = MAT_FACTOR_LU; 275 B->info.factor_mallocs = reallocs; 276 B->info.fill_ratio_given = f; 277 278 if (ai[n] != 0) { 279 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 280 } else { 281 B->info.fill_ratio_needed = 0.0; 282 } 283 #if defined(PETSC_USE_INFO) 284 if (ai[n] != 0) { 285 PetscReal af = B->info.fill_ratio_needed; 286 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 287 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 288 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 289 PetscCall(PetscInfo(A, "for best performance.\n")); 290 } else { 291 PetscCall(PetscInfo(A, "Empty matrix\n")); 292 } 293 #endif 294 295 PetscCall(ISIdentity(isrow, &row_identity)); 296 PetscCall(ISIdentity(iscol, &col_identity)); 297 298 both_identity = (PetscBool)(row_identity && col_identity); 299 300 PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity)); 301 PetscFunctionReturn(PETSC_SUCCESS); 302 } 303