1 2 /* 3 Factorization code for BAIJ format. 4 */ 5 #include <../src/mat/impls/baij/seq/baij.h> 6 #include <petsc/private/kernels/blockinvert.h> 7 8 /* 9 This is used to set the numeric factorization for both LU and ILU symbolic factorization 10 */ 11 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural) { 12 PetscFunctionBegin; 13 if (natural) { 14 switch (fact->rmap->bs) { 15 case 1: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; break; 16 case 2: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; break; 17 case 3: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; break; 18 case 4: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; break; 19 case 5: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; break; 20 case 6: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; break; 21 case 7: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; break; 22 case 9: 23 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 24 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering; 25 #else 26 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 27 #endif 28 break; 29 case 15: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering; break; 30 default: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; break; 31 } 32 } else { 33 switch (fact->rmap->bs) { 34 case 1: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; break; 35 case 2: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; break; 36 case 3: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; break; 37 case 4: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; break; 38 case 5: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; break; 39 case 6: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; break; 40 case 7: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; break; 41 default: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; break; 42 } 43 } 44 PetscFunctionReturn(0); 45 } 46 47 PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural) { 48 PetscFunctionBegin; 49 if (natural) { 50 switch (inA->rmap->bs) { 51 case 1: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; break; 52 case 2: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace; break; 53 case 3: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace; break; 54 case 4: 55 #if defined(PETSC_USE_REAL_MAT_SINGLE) 56 { 57 PetscBool sse_enabled_local; 58 PetscCall(PetscSSEIsEnabled(inA->comm, &sse_enabled_local, NULL)); 59 if (sse_enabled_local) { 60 #if defined(PETSC_HAVE_SSE) 61 int i, *AJ = a->j, nz = a->nz, n = a->mbs; 62 if (n == (unsigned short)n) { 63 unsigned short *aj = (unsigned short *)AJ; 64 for (i = 0; i < nz; i++) aj[i] = (unsigned short)AJ[i]; 65 66 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 67 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 68 69 PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, ushort j index factor BS=4\n")); 70 } else { 71 /* Scale the column indices for easier indexing in MatSolve. */ 72 /* for (i=0;i<nz;i++) { */ 73 /* AJ[i] = AJ[i]*4; */ 74 /* } */ 75 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 76 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 77 78 PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, int j index factor BS=4\n")); 79 } 80 #else 81 /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 82 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "SSE Hardware unavailable"); 83 #endif 84 } else { 85 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 86 } 87 } 88 #else 89 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 90 #endif 91 break; 92 case 5: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace; break; 93 case 6: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace; break; 94 case 7: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace; break; 95 default: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; break; 96 } 97 } else { 98 switch (inA->rmap->bs) { 99 case 1: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; break; 100 case 2: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace; break; 101 case 3: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace; break; 102 case 4: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace; break; 103 case 5: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace; break; 104 case 6: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace; break; 105 case 7: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace; break; 106 default: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; break; 107 } 108 } 109 PetscFunctionReturn(0); 110 } 111 112 /* 113 The symbolic factorization code is identical to that for AIJ format, 114 except for very small changes since this is now a SeqBAIJ datastructure. 115 NOT good code reuse. 116 */ 117 #include <petscbt.h> 118 #include <../src/mat/utils/freespace.h> 119 120 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 122 PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 123 PetscBool row_identity, col_identity, both_identity; 124 IS isicol; 125 const PetscInt *r, *ic; 126 PetscInt i, *ai = a->i, *aj = a->j; 127 PetscInt *bi, *bj, *ajtmp; 128 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 129 PetscReal f; 130 PetscInt nlnk, *lnk, k, **bi_ptr; 131 PetscFreeSpaceList free_space = NULL, current_space = NULL; 132 PetscBT lnkbt; 133 PetscBool missing; 134 135 PetscFunctionBegin; 136 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 137 PetscCall(MatMissingDiagonal(A, &missing, &i)); 138 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 139 140 if (bs > 1) { /* check shifttype */ 141 PetscCheck(info->shifttype != MAT_SHIFT_NONZERO && info->shifttype != MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 142 } 143 144 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 145 PetscCall(ISGetIndices(isrow, &r)); 146 PetscCall(ISGetIndices(isicol, &ic)); 147 148 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 149 PetscCall(PetscMalloc1(n + 1, &bi)); 150 PetscCall(PetscMalloc1(n + 1, &bdiag)); 151 bi[0] = bdiag[0] = 0; 152 153 /* linked list for storing column indices of the active row */ 154 nlnk = n + 1; 155 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 156 157 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 158 159 /* initial FreeSpace size is f*(ai[n]+1) */ 160 f = info->fill; 161 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 162 163 current_space = free_space; 164 165 for (i = 0; i < n; i++) { 166 /* copy previous fill into linked list */ 167 nzi = 0; 168 nnz = ai[r[i] + 1] - ai[r[i]]; 169 ajtmp = aj + ai[r[i]]; 170 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 171 nzi += nlnk; 172 173 /* add pivot rows into linked list */ 174 row = lnk[n]; 175 while (row < i) { 176 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 177 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 178 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 179 nzi += nlnk; 180 row = lnk[row]; 181 } 182 bi[i + 1] = bi[i] + nzi; 183 im[i] = nzi; 184 185 /* mark bdiag */ 186 nzbd = 0; 187 nnz = nzi; 188 k = lnk[n]; 189 while (nnz-- && k < i) { 190 nzbd++; 191 k = lnk[k]; 192 } 193 bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 194 195 /* if free space is not available, make more free space */ 196 if (current_space->local_remaining < nzi) { 197 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */ 198 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 199 reallocs++; 200 } 201 202 /* copy data into free space, then initialize lnk */ 203 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 204 205 bi_ptr[i] = current_space->array; 206 current_space->array += nzi; 207 current_space->local_used += nzi; 208 current_space->local_remaining -= nzi; 209 } 210 211 PetscCall(ISRestoreIndices(isrow, &r)); 212 PetscCall(ISRestoreIndices(isicol, &ic)); 213 214 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 215 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 216 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 217 PetscCall(PetscLLDestroy(lnk, lnkbt)); 218 PetscCall(PetscFree2(bi_ptr, im)); 219 220 /* put together the new matrix */ 221 PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 222 PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol)); 223 b = (Mat_SeqBAIJ *)(B)->data; 224 225 b->free_a = PETSC_TRUE; 226 b->free_ij = PETSC_TRUE; 227 b->singlemalloc = PETSC_FALSE; 228 229 PetscCall(PetscMalloc1((bdiag[0] + 1) * bs2, &b->a)); 230 b->j = bj; 231 b->i = bi; 232 b->diag = bdiag; 233 b->free_diag = PETSC_TRUE; 234 b->ilen = NULL; 235 b->imax = NULL; 236 b->row = isrow; 237 b->col = iscol; 238 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 239 240 PetscCall(PetscObjectReference((PetscObject)isrow)); 241 PetscCall(PetscObjectReference((PetscObject)iscol)); 242 b->icol = isicol; 243 PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 244 PetscCall(PetscLogObjectMemory((PetscObject)B, (bdiag[0] + 1) * (sizeof(PetscInt) + sizeof(PetscScalar) * bs2))); 245 246 b->maxnz = b->nz = bdiag[0] + 1; 247 248 B->factortype = MAT_FACTOR_LU; 249 B->info.factor_mallocs = reallocs; 250 B->info.fill_ratio_given = f; 251 252 if (ai[n] != 0) { 253 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 254 } else { 255 B->info.fill_ratio_needed = 0.0; 256 } 257 #if defined(PETSC_USE_INFO) 258 if (ai[n] != 0) { 259 PetscReal af = B->info.fill_ratio_needed; 260 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 261 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 262 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 263 PetscCall(PetscInfo(A, "for best performance.\n")); 264 } else { 265 PetscCall(PetscInfo(A, "Empty matrix\n")); 266 } 267 #endif 268 269 PetscCall(ISIdentity(isrow, &row_identity)); 270 PetscCall(ISIdentity(iscol, &col_identity)); 271 272 both_identity = (PetscBool)(row_identity && col_identity); 273 274 PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity)); 275 PetscFunctionReturn(0); 276 } 277 278 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) { 279 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 280 PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 281 PetscBool row_identity, col_identity, both_identity; 282 IS isicol; 283 const PetscInt *r, *ic; 284 PetscInt i, *ai = a->i, *aj = a->j; 285 PetscInt *bi, *bj, *ajtmp; 286 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 287 PetscReal f; 288 PetscInt nlnk, *lnk, k, **bi_ptr; 289 PetscFreeSpaceList free_space = NULL, current_space = NULL; 290 PetscBT lnkbt; 291 PetscBool missing; 292 293 PetscFunctionBegin; 294 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 295 PetscCall(MatMissingDiagonal(A, &missing, &i)); 296 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 297 298 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 299 PetscCall(ISGetIndices(isrow, &r)); 300 PetscCall(ISGetIndices(isicol, &ic)); 301 302 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 303 PetscCall(PetscMalloc1(n + 1, &bi)); 304 PetscCall(PetscMalloc1(n + 1, &bdiag)); 305 306 bi[0] = bdiag[0] = 0; 307 308 /* linked list for storing column indices of the active row */ 309 nlnk = n + 1; 310 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 311 312 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 313 314 /* initial FreeSpace size is f*(ai[n]+1) */ 315 f = info->fill; 316 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 317 current_space = free_space; 318 319 for (i = 0; i < n; i++) { 320 /* copy previous fill into linked list */ 321 nzi = 0; 322 nnz = ai[r[i] + 1] - ai[r[i]]; 323 ajtmp = aj + ai[r[i]]; 324 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 325 nzi += nlnk; 326 327 /* add pivot rows into linked list */ 328 row = lnk[n]; 329 while (row < i) { 330 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 331 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 332 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 333 nzi += nlnk; 334 row = lnk[row]; 335 } 336 bi[i + 1] = bi[i] + nzi; 337 im[i] = nzi; 338 339 /* mark bdiag */ 340 nzbd = 0; 341 nnz = nzi; 342 k = lnk[n]; 343 while (nnz-- && k < i) { 344 nzbd++; 345 k = lnk[k]; 346 } 347 bdiag[i] = bi[i] + nzbd; 348 349 /* if free space is not available, make more free space */ 350 if (current_space->local_remaining < nzi) { 351 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 352 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 353 reallocs++; 354 } 355 356 /* copy data into free space, then initialize lnk */ 357 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 358 359 bi_ptr[i] = current_space->array; 360 current_space->array += nzi; 361 current_space->local_used += nzi; 362 current_space->local_remaining -= nzi; 363 } 364 #if defined(PETSC_USE_INFO) 365 if (ai[n] != 0) { 366 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 367 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 368 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 369 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 370 PetscCall(PetscInfo(A, "for best performance.\n")); 371 } else { 372 PetscCall(PetscInfo(A, "Empty matrix\n")); 373 } 374 #endif 375 376 PetscCall(ISRestoreIndices(isrow, &r)); 377 PetscCall(ISRestoreIndices(isicol, &ic)); 378 379 /* destroy list of free space and other temporary array(s) */ 380 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 381 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 382 PetscCall(PetscLLDestroy(lnk, lnkbt)); 383 PetscCall(PetscFree2(bi_ptr, im)); 384 385 /* put together the new matrix */ 386 PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 387 PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)isicol)); 388 b = (Mat_SeqBAIJ *)(B)->data; 389 390 b->free_a = PETSC_TRUE; 391 b->free_ij = PETSC_TRUE; 392 b->singlemalloc = PETSC_FALSE; 393 394 PetscCall(PetscMalloc1((bi[n] + 1) * bs2, &b->a)); 395 b->j = bj; 396 b->i = bi; 397 b->diag = bdiag; 398 b->free_diag = PETSC_TRUE; 399 b->ilen = NULL; 400 b->imax = NULL; 401 b->row = isrow; 402 b->col = iscol; 403 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 404 405 PetscCall(PetscObjectReference((PetscObject)isrow)); 406 PetscCall(PetscObjectReference((PetscObject)iscol)); 407 b->icol = isicol; 408 409 PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 410 PetscCall(PetscLogObjectMemory((PetscObject)B, (bi[n] - n) * (sizeof(PetscInt) + sizeof(PetscScalar) * bs2))); 411 412 b->maxnz = b->nz = bi[n]; 413 414 (B)->factortype = MAT_FACTOR_LU; 415 (B)->info.factor_mallocs = reallocs; 416 (B)->info.fill_ratio_given = f; 417 418 if (ai[n] != 0) { 419 (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 420 } else { 421 (B)->info.fill_ratio_needed = 0.0; 422 } 423 424 PetscCall(ISIdentity(isrow, &row_identity)); 425 PetscCall(ISIdentity(iscol, &col_identity)); 426 427 both_identity = (PetscBool)(row_identity && col_identity); 428 429 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity)); 430 PetscFunctionReturn(0); 431 } 432