1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <petscbt.h> 4 #include <../src/mat/utils/freespace.h> 5 6 /* 7 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 8 9 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 10 */ 11 #if 0 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) 13 { 14 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 15 PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 16 const PetscInt *ai = a->i, *aj = a->j; 17 const PetscScalar *aa = a->a; 18 PetscBool *done; 19 PetscReal best, past = 0, future; 20 21 PetscFunctionBegin; 22 /* pick initial row */ 23 best = -1; 24 for (i = 0; i < n; i++) { 25 future = 0.0; 26 for (j = ai[i]; j < ai[i + 1]; j++) { 27 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 28 else past = PetscAbsScalar(aa[j]); 29 } 30 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 31 if (past / future > best) { 32 best = past / future; 33 current = i; 34 } 35 } 36 37 PetscCall(PetscMalloc1(n, &done)); 38 PetscCall(PetscArrayzero(done, n)); 39 PetscCall(PetscMalloc1(n, &order)); 40 order[0] = current; 41 for (i = 0; i < n - 1; i++) { 42 done[current] = PETSC_TRUE; 43 best = -1; 44 /* loop over all neighbors of current pivot */ 45 for (j = ai[current]; j < ai[current + 1]; j++) { 46 jj = aj[j]; 47 if (done[jj]) continue; 48 /* loop over columns of potential next row computing weights for below and above diagonal */ 49 past = future = 0.0; 50 for (k = ai[jj]; k < ai[jj + 1]; k++) { 51 kk = aj[k]; 52 if (done[kk]) past += PetscAbsScalar(aa[k]); 53 else if (kk != jj) future += PetscAbsScalar(aa[k]); 54 } 55 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 56 if (past / future > best) { 57 best = past / future; 58 newcurrent = jj; 59 } 60 } 61 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 62 best = -1; 63 for (k = 0; k < n; k++) { 64 if (done[k]) continue; 65 future = 0.0; 66 past = 0.0; 67 for (j = ai[k]; j < ai[k + 1]; j++) { 68 kk = aj[j]; 69 if (done[kk]) past += PetscAbsScalar(aa[j]); 70 else if (kk != k) future += PetscAbsScalar(aa[j]); 71 } 72 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 73 if (past / future > best) { 74 best = past / future; 75 newcurrent = k; 76 } 77 } 78 } 79 PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 80 current = newcurrent; 81 order[i + 1] = current; 82 } 83 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 84 *icol = *irow; 85 PetscCall(PetscObjectReference((PetscObject)*irow)); 86 PetscCall(PetscFree(done)); 87 PetscCall(PetscFree(order)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 #endif 91 92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 93 { 94 PetscFunctionBegin; 95 *type = MATSOLVERPETSC; 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98 99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 100 { 101 PetscInt n = A->rmap->n; 102 103 PetscFunctionBegin; 104 #if defined(PETSC_USE_COMPLEX) 105 if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 106 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n")); 107 *B = NULL; 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 #endif 111 112 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 113 PetscCall(MatSetSizes(*B, n, n, n, n)); 114 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 115 PetscCall(MatSetType(*B, MATSEQAIJ)); 116 117 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 118 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 119 120 PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 121 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 122 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 123 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 124 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 125 PetscCall(MatSetType(*B, MATSEQSBAIJ)); 126 PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 127 128 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 129 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 130 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 131 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 132 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 133 (*B)->factortype = ftype; 134 135 PetscCall(PetscFree((*B)->solvertype)); 136 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 137 (*B)->canuseordering = PETSC_TRUE; 138 PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 139 PetscFunctionReturn(PETSC_SUCCESS); 140 } 141 142 #if 0 143 // currently unused 144 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 145 { 146 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 147 IS isicol; 148 const PetscInt *r, *ic; 149 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 150 PetscInt *bi, *bj, *ajtmp; 151 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 152 PetscReal f; 153 PetscInt nlnk, *lnk, k, **bi_ptr; 154 PetscFreeSpaceList free_space = NULL, current_space = NULL; 155 PetscBT lnkbt; 156 PetscBool missing; 157 158 PetscFunctionBegin; 159 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 160 PetscCall(MatMissingDiagonal(A, &missing, &i)); 161 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 162 163 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 164 PetscCall(ISGetIndices(isrow, &r)); 165 PetscCall(ISGetIndices(isicol, &ic)); 166 167 /* get new row pointers */ 168 PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi)); 169 bi[0] = 0; 170 171 /* bdiag is location of diagonal in factor */ 172 PetscCall(PetscMalloc1(n + 1, &bdiag)); 173 bdiag[0] = 0; 174 175 /* linked list for storing column indices of the active row */ 176 nlnk = n + 1; 177 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 178 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 179 180 /* initial FreeSpace size is f*(ai[n]+1) */ 181 f = info->fill; 182 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 183 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 184 current_space = free_space; 185 186 for (i = 0; i < n; i++) { 187 /* copy previous fill into linked list */ 188 nzi = 0; 189 nnz = ai[r[i] + 1] - ai[r[i]]; 190 ajtmp = aj + ai[r[i]]; 191 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 192 nzi += nlnk; 193 194 /* add pivot rows into linked list */ 195 row = lnk[n]; 196 while (row < i) { 197 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 198 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 199 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 200 nzi += nlnk; 201 row = lnk[row]; 202 } 203 bi[i + 1] = bi[i] + nzi; 204 im[i] = nzi; 205 206 /* mark bdiag */ 207 nzbd = 0; 208 nnz = nzi; 209 k = lnk[n]; 210 while (nnz-- && k < i) { 211 nzbd++; 212 k = lnk[k]; 213 } 214 bdiag[i] = bi[i] + nzbd; 215 216 /* if free space is not available, make more free space */ 217 if (current_space->local_remaining < nzi) { 218 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 219 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 220 reallocs++; 221 } 222 223 /* copy data into free space, then initialize lnk */ 224 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 225 226 bi_ptr[i] = current_space->array; 227 current_space->array += nzi; 228 current_space->local_used += nzi; 229 current_space->local_remaining -= nzi; 230 } 231 #if defined(PETSC_USE_INFO) 232 if (ai[n] != 0) { 233 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 234 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 235 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 236 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 237 PetscCall(PetscInfo(A, "for best performance.\n")); 238 } else { 239 PetscCall(PetscInfo(A, "Empty matrix\n")); 240 } 241 #endif 242 243 PetscCall(ISRestoreIndices(isrow, &r)); 244 PetscCall(ISRestoreIndices(isicol, &ic)); 245 246 /* destroy list of free space and other temporary array(s) */ 247 PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj)); 248 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 249 PetscCall(PetscLLDestroy(lnk, lnkbt)); 250 PetscCall(PetscFree2(bi_ptr, im)); 251 252 /* put together the new matrix */ 253 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 254 b = (Mat_SeqAIJ *)B->data; 255 b->free_ij = PETSC_TRUE; 256 PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a)); 257 b->free_a = PETSC_TRUE; 258 b->j = bj; 259 b->i = bi; 260 b->diag = bdiag; 261 b->ilen = NULL; 262 b->imax = NULL; 263 b->row = isrow; 264 b->col = iscol; 265 PetscCall(PetscObjectReference((PetscObject)isrow)); 266 PetscCall(PetscObjectReference((PetscObject)iscol)); 267 b->icol = isicol; 268 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 269 270 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 271 b->maxnz = b->nz = bi[n]; 272 273 B->factortype = MAT_FACTOR_LU; 274 B->info.factor_mallocs = reallocs; 275 B->info.fill_ratio_given = f; 276 277 if (ai[n]) { 278 B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 279 } else { 280 B->info.fill_ratio_needed = 0.0; 281 } 282 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 283 if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 #endif 287 288 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 289 { 290 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 291 IS isicol; 292 const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 293 PetscInt i, n = A->rmap->n; 294 PetscInt *bi, *bj; 295 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 296 PetscReal f; 297 PetscInt nlnk, *lnk, k, **bi_ptr; 298 PetscFreeSpaceList free_space = NULL, current_space = NULL; 299 PetscBT lnkbt; 300 PetscBool missing; 301 302 PetscFunctionBegin; 303 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 304 PetscCall(MatMissingDiagonal(A, &missing, &i)); 305 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 306 307 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 308 PetscCall(ISGetIndices(isrow, &r)); 309 PetscCall(ISGetIndices(isicol, &ic)); 310 311 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 312 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 313 PetscCall(PetscMalloc1(n + 1, &bdiag)); 314 bi[0] = bdiag[0] = 0; 315 316 /* linked list for storing column indices of the active row */ 317 nlnk = n + 1; 318 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 319 320 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 321 322 /* initial FreeSpace size is f*(ai[n]+1) */ 323 f = info->fill; 324 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 325 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 326 current_space = free_space; 327 328 for (i = 0; i < n; i++) { 329 /* copy previous fill into linked list */ 330 nzi = 0; 331 nnz = ai[r[i] + 1] - ai[r[i]]; 332 ajtmp = aj + ai[r[i]]; 333 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 334 nzi += nlnk; 335 336 /* add pivot rows into linked list */ 337 row = lnk[n]; 338 while (row < i) { 339 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 340 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 341 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 342 nzi += nlnk; 343 row = lnk[row]; 344 } 345 bi[i + 1] = bi[i] + nzi; 346 im[i] = nzi; 347 348 /* mark bdiag */ 349 nzbd = 0; 350 nnz = nzi; 351 k = lnk[n]; 352 while (nnz-- && k < i) { 353 nzbd++; 354 k = lnk[k]; 355 } 356 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 357 358 /* if free space is not available, make more free space */ 359 if (current_space->local_remaining < nzi) { 360 /* estimated additional space needed */ 361 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 362 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 363 reallocs++; 364 } 365 366 /* copy data into free space, then initialize lnk */ 367 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 368 369 bi_ptr[i] = current_space->array; 370 current_space->array += nzi; 371 current_space->local_used += nzi; 372 current_space->local_remaining -= nzi; 373 } 374 375 PetscCall(ISRestoreIndices(isrow, &r)); 376 PetscCall(ISRestoreIndices(isicol, &ic)); 377 378 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 379 PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj)); 380 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 381 PetscCall(PetscLLDestroy(lnk, lnkbt)); 382 PetscCall(PetscFree2(bi_ptr, im)); 383 384 /* put together the new matrix */ 385 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 386 b = (Mat_SeqAIJ *)B->data; 387 b->free_ij = PETSC_TRUE; 388 PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 389 b->free_a = PETSC_TRUE; 390 b->j = bj; 391 b->i = bi; 392 b->diag = bdiag; 393 b->ilen = NULL; 394 b->imax = NULL; 395 b->row = isrow; 396 b->col = iscol; 397 PetscCall(PetscObjectReference((PetscObject)isrow)); 398 PetscCall(PetscObjectReference((PetscObject)iscol)); 399 b->icol = isicol; 400 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 401 402 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 403 b->maxnz = b->nz = bdiag[0] + 1; 404 405 B->factortype = MAT_FACTOR_LU; 406 B->info.factor_mallocs = reallocs; 407 B->info.fill_ratio_given = f; 408 409 if (ai[n]) { 410 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 411 } else { 412 B->info.fill_ratio_needed = 0.0; 413 } 414 #if defined(PETSC_USE_INFO) 415 if (ai[n] != 0) { 416 PetscReal af = B->info.fill_ratio_needed; 417 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 418 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 419 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 420 PetscCall(PetscInfo(A, "for best performance.\n")); 421 } else { 422 PetscCall(PetscInfo(A, "Empty matrix\n")); 423 } 424 #endif 425 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 426 if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 427 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 428 PetscFunctionReturn(PETSC_SUCCESS); 429 } 430 431 /* 432 Trouble in factorization, should we dump the original matrix? 433 */ 434 PetscErrorCode MatFactorDumpMatrix(Mat A) 435 { 436 PetscBool flg = PETSC_FALSE; 437 438 PetscFunctionBegin; 439 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 440 if (flg) { 441 PetscViewer viewer; 442 char filename[PETSC_MAX_PATH_LEN]; 443 444 PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 445 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 446 PetscCall(MatView(A, viewer)); 447 PetscCall(PetscViewerDestroy(&viewer)); 448 } 449 PetscFunctionReturn(PETSC_SUCCESS); 450 } 451 452 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 453 { 454 Mat C = B; 455 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 456 IS isrow = b->row, isicol = b->icol; 457 const PetscInt *r, *ic, *ics; 458 const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 459 PetscInt i, j, k, nz, nzL, row, *pj; 460 const PetscInt *ajtmp, *bjtmp; 461 MatScalar *rtmp, *pc, multiplier, *pv; 462 const MatScalar *aa = a->a, *v; 463 PetscBool row_identity, col_identity; 464 FactorShiftCtx sctx; 465 const PetscInt *ddiag; 466 PetscReal rs; 467 MatScalar d; 468 469 PetscFunctionBegin; 470 /* MatPivotSetUp(): initialize shift context sctx */ 471 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 472 473 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 474 ddiag = a->diag; 475 sctx.shift_top = info->zeropivot; 476 for (i = 0; i < n; i++) { 477 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 478 d = (aa)[ddiag[i]]; 479 rs = -PetscAbsScalar(d) - PetscRealPart(d); 480 v = aa + ai[i]; 481 nz = ai[i + 1] - ai[i]; 482 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 483 if (rs > sctx.shift_top) sctx.shift_top = rs; 484 } 485 sctx.shift_top *= 1.1; 486 sctx.nshift_max = 5; 487 sctx.shift_lo = 0.; 488 sctx.shift_hi = 1.; 489 } 490 491 PetscCall(ISGetIndices(isrow, &r)); 492 PetscCall(ISGetIndices(isicol, &ic)); 493 PetscCall(PetscMalloc1(n + 1, &rtmp)); 494 ics = ic; 495 496 do { 497 sctx.newshift = PETSC_FALSE; 498 for (i = 0; i < n; i++) { 499 /* zero rtmp */ 500 /* L part */ 501 nz = bi[i + 1] - bi[i]; 502 bjtmp = bj + bi[i]; 503 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 504 505 /* U part */ 506 nz = bdiag[i] - bdiag[i + 1]; 507 bjtmp = bj + bdiag[i + 1] + 1; 508 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 509 510 /* load in initial (unfactored row) */ 511 nz = ai[r[i] + 1] - ai[r[i]]; 512 ajtmp = aj + ai[r[i]]; 513 v = aa + ai[r[i]]; 514 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 515 /* ZeropivotApply() */ 516 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 517 518 /* elimination */ 519 bjtmp = bj + bi[i]; 520 row = *bjtmp++; 521 nzL = bi[i + 1] - bi[i]; 522 for (k = 0; k < nzL; k++) { 523 pc = rtmp + row; 524 if (*pc != 0.0) { 525 pv = b->a + bdiag[row]; 526 multiplier = *pc * (*pv); 527 *pc = multiplier; 528 529 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 530 pv = b->a + bdiag[row + 1] + 1; 531 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 532 533 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 534 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 535 } 536 row = *bjtmp++; 537 } 538 539 /* finished row so stick it into b->a */ 540 rs = 0.0; 541 /* L part */ 542 pv = b->a + bi[i]; 543 pj = b->j + bi[i]; 544 nz = bi[i + 1] - bi[i]; 545 for (j = 0; j < nz; j++) { 546 pv[j] = rtmp[pj[j]]; 547 rs += PetscAbsScalar(pv[j]); 548 } 549 550 /* U part */ 551 pv = b->a + bdiag[i + 1] + 1; 552 pj = b->j + bdiag[i + 1] + 1; 553 nz = bdiag[i] - bdiag[i + 1] - 1; 554 for (j = 0; j < nz; j++) { 555 pv[j] = rtmp[pj[j]]; 556 rs += PetscAbsScalar(pv[j]); 557 } 558 559 sctx.rs = rs; 560 sctx.pv = rtmp[i]; 561 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 562 if (sctx.newshift) break; /* break for-loop */ 563 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 564 565 /* Mark diagonal and invert diagonal for simpler triangular solves */ 566 pv = b->a + bdiag[i]; 567 *pv = 1.0 / rtmp[i]; 568 569 } /* endof for (i=0; i<n; i++) { */ 570 571 /* MatPivotRefine() */ 572 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 573 /* 574 * if no shift in this attempt & shifting & started shifting & can refine, 575 * then try lower shift 576 */ 577 sctx.shift_hi = sctx.shift_fraction; 578 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 579 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 580 sctx.newshift = PETSC_TRUE; 581 sctx.nshift++; 582 } 583 } while (sctx.newshift); 584 585 PetscCall(PetscFree(rtmp)); 586 PetscCall(ISRestoreIndices(isicol, &ic)); 587 PetscCall(ISRestoreIndices(isrow, &r)); 588 589 PetscCall(ISIdentity(isrow, &row_identity)); 590 PetscCall(ISIdentity(isicol, &col_identity)); 591 if (b->inode.size) { 592 C->ops->solve = MatSolve_SeqAIJ_Inode; 593 } else if (row_identity && col_identity) { 594 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 595 } else { 596 C->ops->solve = MatSolve_SeqAIJ; 597 } 598 C->ops->solveadd = MatSolveAdd_SeqAIJ; 599 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 600 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 601 C->ops->matsolve = MatMatSolve_SeqAIJ; 602 C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ; 603 C->assembled = PETSC_TRUE; 604 C->preallocated = PETSC_TRUE; 605 606 PetscCall(PetscLogFlops(C->cmap->n)); 607 608 /* MatShiftView(A,info,&sctx) */ 609 if (sctx.nshift) { 610 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 611 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 612 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 613 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 614 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 615 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 616 } 617 } 618 PetscFunctionReturn(PETSC_SUCCESS); 619 } 620 621 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat); 622 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec); 623 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec); 624 625 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 626 { 627 Mat C = B; 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 629 IS isrow = b->row, isicol = b->icol; 630 const PetscInt *r, *ic, *ics; 631 PetscInt nz, row, i, j, n = A->rmap->n, diag; 632 const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 633 const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 634 MatScalar *pv, *rtmp, *pc, multiplier, d; 635 const MatScalar *v, *aa = a->a; 636 PetscReal rs = 0.0; 637 FactorShiftCtx sctx; 638 const PetscInt *ddiag; 639 PetscBool row_identity, col_identity; 640 641 PetscFunctionBegin; 642 /* MatPivotSetUp(): initialize shift context sctx */ 643 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 644 645 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 646 ddiag = a->diag; 647 sctx.shift_top = info->zeropivot; 648 for (i = 0; i < n; i++) { 649 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 650 d = (aa)[ddiag[i]]; 651 rs = -PetscAbsScalar(d) - PetscRealPart(d); 652 v = aa + ai[i]; 653 nz = ai[i + 1] - ai[i]; 654 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 655 if (rs > sctx.shift_top) sctx.shift_top = rs; 656 } 657 sctx.shift_top *= 1.1; 658 sctx.nshift_max = 5; 659 sctx.shift_lo = 0.; 660 sctx.shift_hi = 1.; 661 } 662 663 PetscCall(ISGetIndices(isrow, &r)); 664 PetscCall(ISGetIndices(isicol, &ic)); 665 PetscCall(PetscMalloc1(n + 1, &rtmp)); 666 ics = ic; 667 668 do { 669 sctx.newshift = PETSC_FALSE; 670 for (i = 0; i < n; i++) { 671 nz = bi[i + 1] - bi[i]; 672 bjtmp = bj + bi[i]; 673 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 674 675 /* load in initial (unfactored row) */ 676 nz = ai[r[i] + 1] - ai[r[i]]; 677 ajtmp = aj + ai[r[i]]; 678 v = aa + ai[r[i]]; 679 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 680 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 681 682 row = *bjtmp++; 683 while (row < i) { 684 pc = rtmp + row; 685 if (*pc != 0.0) { 686 pv = b->a + diag_offset[row]; 687 pj = b->j + diag_offset[row] + 1; 688 multiplier = *pc / *pv++; 689 *pc = multiplier; 690 nz = bi[row + 1] - diag_offset[row] - 1; 691 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 692 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 693 } 694 row = *bjtmp++; 695 } 696 /* finished row so stick it into b->a */ 697 pv = b->a + bi[i]; 698 pj = b->j + bi[i]; 699 nz = bi[i + 1] - bi[i]; 700 diag = diag_offset[i] - bi[i]; 701 rs = 0.0; 702 for (j = 0; j < nz; j++) { 703 pv[j] = rtmp[pj[j]]; 704 rs += PetscAbsScalar(pv[j]); 705 } 706 rs -= PetscAbsScalar(pv[diag]); 707 708 sctx.rs = rs; 709 sctx.pv = pv[diag]; 710 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 711 if (sctx.newshift) break; 712 pv[diag] = sctx.pv; 713 } 714 715 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 716 /* 717 * if no shift in this attempt & shifting & started shifting & can refine, 718 * then try lower shift 719 */ 720 sctx.shift_hi = sctx.shift_fraction; 721 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 722 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 723 sctx.newshift = PETSC_TRUE; 724 sctx.nshift++; 725 } 726 } while (sctx.newshift); 727 728 /* invert diagonal entries for simpler triangular solves */ 729 for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; 730 PetscCall(PetscFree(rtmp)); 731 PetscCall(ISRestoreIndices(isicol, &ic)); 732 PetscCall(ISRestoreIndices(isrow, &r)); 733 734 PetscCall(ISIdentity(isrow, &row_identity)); 735 PetscCall(ISIdentity(isicol, &col_identity)); 736 if (row_identity && col_identity) { 737 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 738 } else { 739 C->ops->solve = MatSolve_SeqAIJ_inplace; 740 } 741 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 742 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 743 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 744 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 745 C->ops->matsolvetranspose = NULL; 746 747 C->assembled = PETSC_TRUE; 748 C->preallocated = PETSC_TRUE; 749 750 PetscCall(PetscLogFlops(C->cmap->n)); 751 if (sctx.nshift) { 752 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 753 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 754 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 755 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 756 } 757 } 758 C->ops->solve = MatSolve_SeqAIJ_inplace; 759 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 760 761 PetscCall(MatSeqAIJCheckInode(C)); 762 PetscFunctionReturn(PETSC_SUCCESS); 763 } 764 765 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec); 766 767 /* 768 This routine implements inplace ILU(0) with row or/and column permutations. 769 Input: 770 A - original matrix 771 Output; 772 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 773 a->j (col index) is permuted by the inverse of colperm, then sorted 774 a->a reordered accordingly with a->j 775 a->diag (ptr to diagonal elements) is updated. 776 */ 777 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 778 { 779 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 780 IS isrow = a->row, isicol = a->icol; 781 const PetscInt *r, *ic, *ics; 782 PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 783 PetscInt *ajtmp, nz, row; 784 PetscInt *diag = a->diag, nbdiag, *pj; 785 PetscScalar *rtmp, *pc, multiplier, d; 786 MatScalar *pv, *v; 787 PetscReal rs; 788 FactorShiftCtx sctx; 789 const MatScalar *aa = a->a, *vtmp; 790 791 PetscFunctionBegin; 792 PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 793 794 /* MatPivotSetUp(): initialize shift context sctx */ 795 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 796 797 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 798 const PetscInt *ddiag = a->diag; 799 sctx.shift_top = info->zeropivot; 800 for (i = 0; i < n; i++) { 801 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 802 d = (aa)[ddiag[i]]; 803 rs = -PetscAbsScalar(d) - PetscRealPart(d); 804 vtmp = aa + ai[i]; 805 nz = ai[i + 1] - ai[i]; 806 for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 807 if (rs > sctx.shift_top) sctx.shift_top = rs; 808 } 809 sctx.shift_top *= 1.1; 810 sctx.nshift_max = 5; 811 sctx.shift_lo = 0.; 812 sctx.shift_hi = 1.; 813 } 814 815 PetscCall(ISGetIndices(isrow, &r)); 816 PetscCall(ISGetIndices(isicol, &ic)); 817 PetscCall(PetscMalloc1(n + 1, &rtmp)); 818 PetscCall(PetscArrayzero(rtmp, n + 1)); 819 ics = ic; 820 821 #if defined(MV) 822 sctx.shift_top = 0.; 823 sctx.nshift_max = 0; 824 sctx.shift_lo = 0.; 825 sctx.shift_hi = 0.; 826 sctx.shift_fraction = 0.; 827 828 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 829 sctx.shift_top = 0.; 830 for (i = 0; i < n; i++) { 831 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 832 d = (a->a)[diag[i]]; 833 rs = -PetscAbsScalar(d) - PetscRealPart(d); 834 v = a->a + ai[i]; 835 nz = ai[i + 1] - ai[i]; 836 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 837 if (rs > sctx.shift_top) sctx.shift_top = rs; 838 } 839 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 840 sctx.shift_top *= 1.1; 841 sctx.nshift_max = 5; 842 sctx.shift_lo = 0.; 843 sctx.shift_hi = 1.; 844 } 845 846 sctx.shift_amount = 0.; 847 sctx.nshift = 0; 848 #endif 849 850 do { 851 sctx.newshift = PETSC_FALSE; 852 for (i = 0; i < n; i++) { 853 /* load in initial unfactored row */ 854 nz = ai[r[i] + 1] - ai[r[i]]; 855 ajtmp = aj + ai[r[i]]; 856 v = a->a + ai[r[i]]; 857 /* sort permuted ajtmp and values v accordingly */ 858 for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 859 PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 860 861 diag[r[i]] = ai[r[i]]; 862 for (j = 0; j < nz; j++) { 863 rtmp[ajtmp[j]] = v[j]; 864 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 865 } 866 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 867 868 row = *ajtmp++; 869 while (row < i) { 870 pc = rtmp + row; 871 if (*pc != 0.0) { 872 pv = a->a + diag[r[row]]; 873 pj = aj + diag[r[row]] + 1; 874 875 multiplier = *pc / *pv++; 876 *pc = multiplier; 877 nz = ai[r[row] + 1] - diag[r[row]] - 1; 878 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 879 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 880 } 881 row = *ajtmp++; 882 } 883 /* finished row so overwrite it onto a->a */ 884 pv = a->a + ai[r[i]]; 885 pj = aj + ai[r[i]]; 886 nz = ai[r[i] + 1] - ai[r[i]]; 887 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 888 889 rs = 0.0; 890 for (j = 0; j < nz; j++) { 891 pv[j] = rtmp[pj[j]]; 892 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 893 } 894 895 sctx.rs = rs; 896 sctx.pv = pv[nbdiag]; 897 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 898 if (sctx.newshift) break; 899 pv[nbdiag] = sctx.pv; 900 } 901 902 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 903 /* 904 * if no shift in this attempt & shifting & started shifting & can refine, 905 * then try lower shift 906 */ 907 sctx.shift_hi = sctx.shift_fraction; 908 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 909 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 910 sctx.newshift = PETSC_TRUE; 911 sctx.nshift++; 912 } 913 } while (sctx.newshift); 914 915 /* invert diagonal entries for simpler triangular solves */ 916 for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; 917 918 PetscCall(PetscFree(rtmp)); 919 PetscCall(ISRestoreIndices(isicol, &ic)); 920 PetscCall(ISRestoreIndices(isrow, &r)); 921 922 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 923 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 924 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 925 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 926 927 A->assembled = PETSC_TRUE; 928 A->preallocated = PETSC_TRUE; 929 930 PetscCall(PetscLogFlops(A->cmap->n)); 931 if (sctx.nshift) { 932 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 933 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 934 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 935 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 936 } 937 } 938 PetscFunctionReturn(PETSC_SUCCESS); 939 } 940 941 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 942 { 943 Mat C; 944 945 PetscFunctionBegin; 946 PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 947 PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 948 PetscCall(MatLUFactorNumeric(C, A, info)); 949 950 A->ops->solve = C->ops->solve; 951 A->ops->solvetranspose = C->ops->solvetranspose; 952 953 PetscCall(MatHeaderMerge(A, &C)); 954 PetscFunctionReturn(PETSC_SUCCESS); 955 } 956 957 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 958 { 959 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 960 IS iscol = a->col, isrow = a->row; 961 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 962 PetscInt nz; 963 const PetscInt *rout, *cout, *r, *c; 964 PetscScalar *x, *tmp, *tmps, sum; 965 const PetscScalar *b; 966 const MatScalar *aa = a->a, *v; 967 968 PetscFunctionBegin; 969 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 970 971 PetscCall(VecGetArrayRead(bb, &b)); 972 PetscCall(VecGetArrayWrite(xx, &x)); 973 tmp = a->solve_work; 974 975 PetscCall(ISGetIndices(isrow, &rout)); 976 r = rout; 977 PetscCall(ISGetIndices(iscol, &cout)); 978 c = cout + (n - 1); 979 980 /* forward solve the lower triangular */ 981 tmp[0] = b[*r++]; 982 tmps = tmp; 983 for (i = 1; i < n; i++) { 984 v = aa + ai[i]; 985 vi = aj + ai[i]; 986 nz = a->diag[i] - ai[i]; 987 sum = b[*r++]; 988 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 989 tmp[i] = sum; 990 } 991 992 /* backward solve the upper triangular */ 993 for (i = n - 1; i >= 0; i--) { 994 v = aa + a->diag[i] + 1; 995 vi = aj + a->diag[i] + 1; 996 nz = ai[i + 1] - a->diag[i] - 1; 997 sum = tmp[i]; 998 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 999 x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 1000 } 1001 1002 PetscCall(ISRestoreIndices(isrow, &rout)); 1003 PetscCall(ISRestoreIndices(iscol, &cout)); 1004 PetscCall(VecRestoreArrayRead(bb, &b)); 1005 PetscCall(VecRestoreArrayWrite(xx, &x)); 1006 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1007 PetscFunctionReturn(PETSC_SUCCESS); 1008 } 1009 1010 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 1011 { 1012 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1013 IS iscol = a->col, isrow = a->row; 1014 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1015 PetscInt nz, neq, ldb, ldx; 1016 const PetscInt *rout, *cout, *r, *c; 1017 PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 1018 const PetscScalar *b, *aa = a->a, *v; 1019 PetscBool isdense; 1020 1021 PetscFunctionBegin; 1022 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1023 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1024 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1025 if (X != B) { 1026 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1027 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1028 } 1029 PetscCall(MatDenseGetArrayRead(B, &b)); 1030 PetscCall(MatDenseGetLDA(B, &ldb)); 1031 PetscCall(MatDenseGetArray(X, &x)); 1032 PetscCall(MatDenseGetLDA(X, &ldx)); 1033 PetscCall(ISGetIndices(isrow, &rout)); 1034 r = rout; 1035 PetscCall(ISGetIndices(iscol, &cout)); 1036 c = cout; 1037 for (neq = 0; neq < B->cmap->n; neq++) { 1038 /* forward solve the lower triangular */ 1039 tmp[0] = b[r[0]]; 1040 tmps = tmp; 1041 for (i = 1; i < n; i++) { 1042 v = aa + ai[i]; 1043 vi = aj + ai[i]; 1044 nz = a->diag[i] - ai[i]; 1045 sum = b[r[i]]; 1046 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1047 tmp[i] = sum; 1048 } 1049 /* backward solve the upper triangular */ 1050 for (i = n - 1; i >= 0; i--) { 1051 v = aa + a->diag[i] + 1; 1052 vi = aj + a->diag[i] + 1; 1053 nz = ai[i + 1] - a->diag[i] - 1; 1054 sum = tmp[i]; 1055 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1056 x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 1057 } 1058 b += ldb; 1059 x += ldx; 1060 } 1061 PetscCall(ISRestoreIndices(isrow, &rout)); 1062 PetscCall(ISRestoreIndices(iscol, &cout)); 1063 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1064 PetscCall(MatDenseRestoreArray(X, &x)); 1065 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1066 PetscFunctionReturn(PETSC_SUCCESS); 1067 } 1068 1069 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 1070 { 1071 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1072 IS iscol = a->col, isrow = a->row; 1073 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1074 PetscInt nz, neq, ldb, ldx; 1075 const PetscInt *rout, *cout, *r, *c; 1076 PetscScalar *x, *tmp = a->solve_work, sum; 1077 const PetscScalar *b, *aa = a->a, *v; 1078 PetscBool isdense; 1079 1080 PetscFunctionBegin; 1081 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1082 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1083 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1084 if (X != B) { 1085 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1086 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1087 } 1088 PetscCall(MatDenseGetArrayRead(B, &b)); 1089 PetscCall(MatDenseGetLDA(B, &ldb)); 1090 PetscCall(MatDenseGetArray(X, &x)); 1091 PetscCall(MatDenseGetLDA(X, &ldx)); 1092 PetscCall(ISGetIndices(isrow, &rout)); 1093 r = rout; 1094 PetscCall(ISGetIndices(iscol, &cout)); 1095 c = cout; 1096 for (neq = 0; neq < B->cmap->n; neq++) { 1097 /* forward solve the lower triangular */ 1098 tmp[0] = b[r[0]]; 1099 v = aa; 1100 vi = aj; 1101 for (i = 1; i < n; i++) { 1102 nz = ai[i + 1] - ai[i]; 1103 sum = b[r[i]]; 1104 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1105 tmp[i] = sum; 1106 v += nz; 1107 vi += nz; 1108 } 1109 /* backward solve the upper triangular */ 1110 for (i = n - 1; i >= 0; i--) { 1111 v = aa + adiag[i + 1] + 1; 1112 vi = aj + adiag[i + 1] + 1; 1113 nz = adiag[i] - adiag[i + 1] - 1; 1114 sum = tmp[i]; 1115 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1116 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 1117 } 1118 b += ldb; 1119 x += ldx; 1120 } 1121 PetscCall(ISRestoreIndices(isrow, &rout)); 1122 PetscCall(ISRestoreIndices(iscol, &cout)); 1123 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1124 PetscCall(MatDenseRestoreArray(X, &x)); 1125 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1126 PetscFunctionReturn(PETSC_SUCCESS); 1127 } 1128 1129 PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X) 1130 { 1131 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1132 IS iscol = a->col, isrow = a->row; 1133 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j; 1134 PetscInt nz, neq, ldb, ldx; 1135 const PetscInt *rout, *cout, *r, *c; 1136 PetscScalar *x, *tmp = a->solve_work, s1; 1137 const PetscScalar *b, *aa = a->a, *v; 1138 PetscBool isdense; 1139 1140 PetscFunctionBegin; 1141 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1142 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1143 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1144 if (X != B) { 1145 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1146 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1147 } 1148 PetscCall(MatDenseGetArrayRead(B, &b)); 1149 PetscCall(MatDenseGetLDA(B, &ldb)); 1150 PetscCall(MatDenseGetArray(X, &x)); 1151 PetscCall(MatDenseGetLDA(X, &ldx)); 1152 PetscCall(ISGetIndices(isrow, &rout)); 1153 r = rout; 1154 PetscCall(ISGetIndices(iscol, &cout)); 1155 c = cout; 1156 for (neq = 0; neq < B->cmap->n; neq++) { 1157 /* copy the b into temp work space according to permutation */ 1158 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1159 1160 /* forward solve the U^T */ 1161 for (i = 0; i < n; i++) { 1162 v = aa + adiag[i + 1] + 1; 1163 vi = aj + adiag[i + 1] + 1; 1164 nz = adiag[i] - adiag[i + 1] - 1; 1165 s1 = tmp[i]; 1166 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1167 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1168 tmp[i] = s1; 1169 } 1170 1171 /* backward solve the L^T */ 1172 for (i = n - 1; i >= 0; i--) { 1173 v = aa + ai[i]; 1174 vi = aj + ai[i]; 1175 nz = ai[i + 1] - ai[i]; 1176 s1 = tmp[i]; 1177 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1178 } 1179 1180 /* copy tmp into x according to permutation */ 1181 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1182 b += ldb; 1183 x += ldx; 1184 } 1185 PetscCall(ISRestoreIndices(isrow, &rout)); 1186 PetscCall(ISRestoreIndices(iscol, &cout)); 1187 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1188 PetscCall(MatDenseRestoreArray(X, &x)); 1189 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1190 PetscFunctionReturn(PETSC_SUCCESS); 1191 } 1192 1193 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 1194 { 1195 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1196 IS iscol = a->col, isrow = a->row; 1197 const PetscInt *r, *c, *rout, *cout; 1198 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1199 PetscInt nz, row; 1200 PetscScalar *x, *tmp, *tmps, sum; 1201 const PetscScalar *b; 1202 const MatScalar *aa = a->a, *v; 1203 1204 PetscFunctionBegin; 1205 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1206 1207 PetscCall(VecGetArrayRead(bb, &b)); 1208 PetscCall(VecGetArrayWrite(xx, &x)); 1209 tmp = a->solve_work; 1210 1211 PetscCall(ISGetIndices(isrow, &rout)); 1212 r = rout; 1213 PetscCall(ISGetIndices(iscol, &cout)); 1214 c = cout + (n - 1); 1215 1216 /* forward solve the lower triangular */ 1217 tmp[0] = b[*r++]; 1218 tmps = tmp; 1219 for (row = 1; row < n; row++) { 1220 i = rout[row]; /* permuted row */ 1221 v = aa + ai[i]; 1222 vi = aj + ai[i]; 1223 nz = a->diag[i] - ai[i]; 1224 sum = b[*r++]; 1225 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1226 tmp[row] = sum; 1227 } 1228 1229 /* backward solve the upper triangular */ 1230 for (row = n - 1; row >= 0; row--) { 1231 i = rout[row]; /* permuted row */ 1232 v = aa + a->diag[i] + 1; 1233 vi = aj + a->diag[i] + 1; 1234 nz = ai[i + 1] - a->diag[i] - 1; 1235 sum = tmp[row]; 1236 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1237 x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1238 } 1239 1240 PetscCall(ISRestoreIndices(isrow, &rout)); 1241 PetscCall(ISRestoreIndices(iscol, &cout)); 1242 PetscCall(VecRestoreArrayRead(bb, &b)); 1243 PetscCall(VecRestoreArrayWrite(xx, &x)); 1244 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1245 PetscFunctionReturn(PETSC_SUCCESS); 1246 } 1247 1248 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1249 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1250 { 1251 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1252 PetscInt n = A->rmap->n; 1253 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1254 PetscScalar *x; 1255 const PetscScalar *b; 1256 const MatScalar *aa = a->a; 1257 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1258 PetscInt adiag_i, i, nz, ai_i; 1259 const PetscInt *vi; 1260 const MatScalar *v; 1261 PetscScalar sum; 1262 #endif 1263 1264 PetscFunctionBegin; 1265 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1266 1267 PetscCall(VecGetArrayRead(bb, &b)); 1268 PetscCall(VecGetArrayWrite(xx, &x)); 1269 1270 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1271 fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 1272 #else 1273 /* forward solve the lower triangular */ 1274 x[0] = b[0]; 1275 for (i = 1; i < n; i++) { 1276 ai_i = ai[i]; 1277 v = aa + ai_i; 1278 vi = aj + ai_i; 1279 nz = adiag[i] - ai_i; 1280 sum = b[i]; 1281 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1282 x[i] = sum; 1283 } 1284 1285 /* backward solve the upper triangular */ 1286 for (i = n - 1; i >= 0; i--) { 1287 adiag_i = adiag[i]; 1288 v = aa + adiag_i + 1; 1289 vi = aj + adiag_i + 1; 1290 nz = ai[i + 1] - adiag_i - 1; 1291 sum = x[i]; 1292 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1293 x[i] = sum * aa[adiag_i]; 1294 } 1295 #endif 1296 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1297 PetscCall(VecRestoreArrayRead(bb, &b)); 1298 PetscCall(VecRestoreArrayWrite(xx, &x)); 1299 PetscFunctionReturn(PETSC_SUCCESS); 1300 } 1301 1302 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1303 { 1304 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1305 IS iscol = a->col, isrow = a->row; 1306 PetscInt i, n = A->rmap->n, j; 1307 PetscInt nz; 1308 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 1309 PetscScalar *x, *tmp, sum; 1310 const PetscScalar *b; 1311 const MatScalar *aa = a->a, *v; 1312 1313 PetscFunctionBegin; 1314 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1315 1316 PetscCall(VecGetArrayRead(bb, &b)); 1317 PetscCall(VecGetArray(xx, &x)); 1318 tmp = a->solve_work; 1319 1320 PetscCall(ISGetIndices(isrow, &rout)); 1321 r = rout; 1322 PetscCall(ISGetIndices(iscol, &cout)); 1323 c = cout + (n - 1); 1324 1325 /* forward solve the lower triangular */ 1326 tmp[0] = b[*r++]; 1327 for (i = 1; i < n; i++) { 1328 v = aa + ai[i]; 1329 vi = aj + ai[i]; 1330 nz = a->diag[i] - ai[i]; 1331 sum = b[*r++]; 1332 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1333 tmp[i] = sum; 1334 } 1335 1336 /* backward solve the upper triangular */ 1337 for (i = n - 1; i >= 0; i--) { 1338 v = aa + a->diag[i] + 1; 1339 vi = aj + a->diag[i] + 1; 1340 nz = ai[i + 1] - a->diag[i] - 1; 1341 sum = tmp[i]; 1342 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1343 tmp[i] = sum * aa[a->diag[i]]; 1344 x[*c--] += tmp[i]; 1345 } 1346 1347 PetscCall(ISRestoreIndices(isrow, &rout)); 1348 PetscCall(ISRestoreIndices(iscol, &cout)); 1349 PetscCall(VecRestoreArrayRead(bb, &b)); 1350 PetscCall(VecRestoreArray(xx, &x)); 1351 PetscCall(PetscLogFlops(2.0 * a->nz)); 1352 PetscFunctionReturn(PETSC_SUCCESS); 1353 } 1354 1355 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1356 { 1357 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1358 IS iscol = a->col, isrow = a->row; 1359 PetscInt i, n = A->rmap->n, j; 1360 PetscInt nz; 1361 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1362 PetscScalar *x, *tmp, sum; 1363 const PetscScalar *b; 1364 const MatScalar *aa = a->a, *v; 1365 1366 PetscFunctionBegin; 1367 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1368 1369 PetscCall(VecGetArrayRead(bb, &b)); 1370 PetscCall(VecGetArray(xx, &x)); 1371 tmp = a->solve_work; 1372 1373 PetscCall(ISGetIndices(isrow, &rout)); 1374 r = rout; 1375 PetscCall(ISGetIndices(iscol, &cout)); 1376 c = cout; 1377 1378 /* forward solve the lower triangular */ 1379 tmp[0] = b[r[0]]; 1380 v = aa; 1381 vi = aj; 1382 for (i = 1; i < n; i++) { 1383 nz = ai[i + 1] - ai[i]; 1384 sum = b[r[i]]; 1385 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1386 tmp[i] = sum; 1387 v += nz; 1388 vi += nz; 1389 } 1390 1391 /* backward solve the upper triangular */ 1392 v = aa + adiag[n - 1]; 1393 vi = aj + adiag[n - 1]; 1394 for (i = n - 1; i >= 0; i--) { 1395 nz = adiag[i] - adiag[i + 1] - 1; 1396 sum = tmp[i]; 1397 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1398 tmp[i] = sum * v[nz]; 1399 x[c[i]] += tmp[i]; 1400 v += nz + 1; 1401 vi += nz + 1; 1402 } 1403 1404 PetscCall(ISRestoreIndices(isrow, &rout)); 1405 PetscCall(ISRestoreIndices(iscol, &cout)); 1406 PetscCall(VecRestoreArrayRead(bb, &b)); 1407 PetscCall(VecRestoreArray(xx, &x)); 1408 PetscCall(PetscLogFlops(2.0 * a->nz)); 1409 PetscFunctionReturn(PETSC_SUCCESS); 1410 } 1411 1412 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1413 { 1414 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1415 IS iscol = a->col, isrow = a->row; 1416 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1417 PetscInt i, n = A->rmap->n, j; 1418 PetscInt nz; 1419 PetscScalar *x, *tmp, s1; 1420 const MatScalar *aa = a->a, *v; 1421 const PetscScalar *b; 1422 1423 PetscFunctionBegin; 1424 PetscCall(VecGetArrayRead(bb, &b)); 1425 PetscCall(VecGetArrayWrite(xx, &x)); 1426 tmp = a->solve_work; 1427 1428 PetscCall(ISGetIndices(isrow, &rout)); 1429 r = rout; 1430 PetscCall(ISGetIndices(iscol, &cout)); 1431 c = cout; 1432 1433 /* copy the b into temp work space according to permutation */ 1434 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1435 1436 /* forward solve the U^T */ 1437 for (i = 0; i < n; i++) { 1438 v = aa + diag[i]; 1439 vi = aj + diag[i] + 1; 1440 nz = ai[i + 1] - diag[i] - 1; 1441 s1 = tmp[i]; 1442 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1443 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1444 tmp[i] = s1; 1445 } 1446 1447 /* backward solve the L^T */ 1448 for (i = n - 1; i >= 0; i--) { 1449 v = aa + diag[i] - 1; 1450 vi = aj + diag[i] - 1; 1451 nz = diag[i] - ai[i]; 1452 s1 = tmp[i]; 1453 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1454 } 1455 1456 /* copy tmp into x according to permutation */ 1457 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1458 1459 PetscCall(ISRestoreIndices(isrow, &rout)); 1460 PetscCall(ISRestoreIndices(iscol, &cout)); 1461 PetscCall(VecRestoreArrayRead(bb, &b)); 1462 PetscCall(VecRestoreArrayWrite(xx, &x)); 1463 1464 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1465 PetscFunctionReturn(PETSC_SUCCESS); 1466 } 1467 1468 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1469 { 1470 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1471 IS iscol = a->col, isrow = a->row; 1472 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1473 PetscInt i, n = A->rmap->n, j; 1474 PetscInt nz; 1475 PetscScalar *x, *tmp, s1; 1476 const MatScalar *aa = a->a, *v; 1477 const PetscScalar *b; 1478 1479 PetscFunctionBegin; 1480 PetscCall(VecGetArrayRead(bb, &b)); 1481 PetscCall(VecGetArrayWrite(xx, &x)); 1482 tmp = a->solve_work; 1483 1484 PetscCall(ISGetIndices(isrow, &rout)); 1485 r = rout; 1486 PetscCall(ISGetIndices(iscol, &cout)); 1487 c = cout; 1488 1489 /* copy the b into temp work space according to permutation */ 1490 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1491 1492 /* forward solve the U^T */ 1493 for (i = 0; i < n; i++) { 1494 v = aa + adiag[i + 1] + 1; 1495 vi = aj + adiag[i + 1] + 1; 1496 nz = adiag[i] - adiag[i + 1] - 1; 1497 s1 = tmp[i]; 1498 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1499 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1500 tmp[i] = s1; 1501 } 1502 1503 /* backward solve the L^T */ 1504 for (i = n - 1; i >= 0; i--) { 1505 v = aa + ai[i]; 1506 vi = aj + ai[i]; 1507 nz = ai[i + 1] - ai[i]; 1508 s1 = tmp[i]; 1509 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1510 } 1511 1512 /* copy tmp into x according to permutation */ 1513 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1514 1515 PetscCall(ISRestoreIndices(isrow, &rout)); 1516 PetscCall(ISRestoreIndices(iscol, &cout)); 1517 PetscCall(VecRestoreArrayRead(bb, &b)); 1518 PetscCall(VecRestoreArrayWrite(xx, &x)); 1519 1520 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1521 PetscFunctionReturn(PETSC_SUCCESS); 1522 } 1523 1524 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1525 { 1526 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1527 IS iscol = a->col, isrow = a->row; 1528 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1529 PetscInt i, n = A->rmap->n, j; 1530 PetscInt nz; 1531 PetscScalar *x, *tmp, s1; 1532 const MatScalar *aa = a->a, *v; 1533 const PetscScalar *b; 1534 1535 PetscFunctionBegin; 1536 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1537 PetscCall(VecGetArrayRead(bb, &b)); 1538 PetscCall(VecGetArray(xx, &x)); 1539 tmp = a->solve_work; 1540 1541 PetscCall(ISGetIndices(isrow, &rout)); 1542 r = rout; 1543 PetscCall(ISGetIndices(iscol, &cout)); 1544 c = cout; 1545 1546 /* copy the b into temp work space according to permutation */ 1547 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1548 1549 /* forward solve the U^T */ 1550 for (i = 0; i < n; i++) { 1551 v = aa + diag[i]; 1552 vi = aj + diag[i] + 1; 1553 nz = ai[i + 1] - diag[i] - 1; 1554 s1 = tmp[i]; 1555 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1556 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1557 tmp[i] = s1; 1558 } 1559 1560 /* backward solve the L^T */ 1561 for (i = n - 1; i >= 0; i--) { 1562 v = aa + diag[i] - 1; 1563 vi = aj + diag[i] - 1; 1564 nz = diag[i] - ai[i]; 1565 s1 = tmp[i]; 1566 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1567 } 1568 1569 /* copy tmp into x according to permutation */ 1570 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1571 1572 PetscCall(ISRestoreIndices(isrow, &rout)); 1573 PetscCall(ISRestoreIndices(iscol, &cout)); 1574 PetscCall(VecRestoreArrayRead(bb, &b)); 1575 PetscCall(VecRestoreArray(xx, &x)); 1576 1577 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1578 PetscFunctionReturn(PETSC_SUCCESS); 1579 } 1580 1581 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1582 { 1583 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1584 IS iscol = a->col, isrow = a->row; 1585 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1586 PetscInt i, n = A->rmap->n, j; 1587 PetscInt nz; 1588 PetscScalar *x, *tmp, s1; 1589 const MatScalar *aa = a->a, *v; 1590 const PetscScalar *b; 1591 1592 PetscFunctionBegin; 1593 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1594 PetscCall(VecGetArrayRead(bb, &b)); 1595 PetscCall(VecGetArray(xx, &x)); 1596 tmp = a->solve_work; 1597 1598 PetscCall(ISGetIndices(isrow, &rout)); 1599 r = rout; 1600 PetscCall(ISGetIndices(iscol, &cout)); 1601 c = cout; 1602 1603 /* copy the b into temp work space according to permutation */ 1604 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1605 1606 /* forward solve the U^T */ 1607 for (i = 0; i < n; i++) { 1608 v = aa + adiag[i + 1] + 1; 1609 vi = aj + adiag[i + 1] + 1; 1610 nz = adiag[i] - adiag[i + 1] - 1; 1611 s1 = tmp[i]; 1612 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1613 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1614 tmp[i] = s1; 1615 } 1616 1617 /* backward solve the L^T */ 1618 for (i = n - 1; i >= 0; i--) { 1619 v = aa + ai[i]; 1620 vi = aj + ai[i]; 1621 nz = ai[i + 1] - ai[i]; 1622 s1 = tmp[i]; 1623 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1624 } 1625 1626 /* copy tmp into x according to permutation */ 1627 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1628 1629 PetscCall(ISRestoreIndices(isrow, &rout)); 1630 PetscCall(ISRestoreIndices(iscol, &cout)); 1631 PetscCall(VecRestoreArrayRead(bb, &b)); 1632 PetscCall(VecRestoreArray(xx, &x)); 1633 1634 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1635 PetscFunctionReturn(PETSC_SUCCESS); 1636 } 1637 1638 /* 1639 ilu() under revised new data structure. 1640 Factored arrays bj and ba are stored as 1641 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1642 1643 bi=fact->i is an array of size n+1, in which 1644 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1645 bi[n]: points to L(n-1,n-1)+1 1646 1647 bdiag=fact->diag is an array of size n+1,in which 1648 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1649 bdiag[n]: points to entry of U(n-1,0)-1 1650 1651 U(i,:) contains bdiag[i] as its last entry, i.e., 1652 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1653 */ 1654 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1655 { 1656 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1657 const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1658 PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 1659 IS isicol; 1660 1661 PetscFunctionBegin; 1662 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1663 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1664 b = (Mat_SeqAIJ *)fact->data; 1665 1666 /* allocate matrix arrays for new data structure */ 1667 PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscScalar), (void **)&b->a)); 1668 PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscInt), (void **)&b->j)); 1669 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i)); 1670 if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1671 b->free_a = PETSC_TRUE; 1672 b->free_ij = PETSC_TRUE; 1673 1674 if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag)); 1675 bdiag = b->diag; 1676 1677 /* set bi and bj with new data structure */ 1678 bi = b->i; 1679 bj = b->j; 1680 1681 /* L part */ 1682 bi[0] = 0; 1683 for (i = 0; i < n; i++) { 1684 nz = adiag[i] - ai[i]; 1685 bi[i + 1] = bi[i] + nz; 1686 aj = a->j + ai[i]; 1687 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1688 } 1689 1690 /* U part */ 1691 bdiag[n] = bi[n] - 1; 1692 for (i = n - 1; i >= 0; i--) { 1693 nz = ai[i + 1] - adiag[i] - 1; 1694 aj = a->j + adiag[i] + 1; 1695 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1696 /* diag[i] */ 1697 bj[k++] = i; 1698 bdiag[i] = bdiag[i + 1] + nz + 1; 1699 } 1700 1701 fact->factortype = MAT_FACTOR_ILU; 1702 fact->info.factor_mallocs = 0; 1703 fact->info.fill_ratio_given = info->fill; 1704 fact->info.fill_ratio_needed = 1.0; 1705 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1706 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1707 1708 b = (Mat_SeqAIJ *)fact->data; 1709 b->row = isrow; 1710 b->col = iscol; 1711 b->icol = isicol; 1712 PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work)); 1713 PetscCall(PetscObjectReference((PetscObject)isrow)); 1714 PetscCall(PetscObjectReference((PetscObject)iscol)); 1715 PetscFunctionReturn(PETSC_SUCCESS); 1716 } 1717 1718 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1719 { 1720 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1721 IS isicol; 1722 const PetscInt *r, *ic; 1723 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1724 PetscInt *bi, *cols, nnz, *cols_lvl; 1725 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1726 PetscInt i, levels, diagonal_fill; 1727 PetscBool col_identity, row_identity, missing; 1728 PetscReal f; 1729 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1730 PetscBT lnkbt; 1731 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1732 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1733 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1734 1735 PetscFunctionBegin; 1736 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1737 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1738 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1739 1740 levels = (PetscInt)info->levels; 1741 PetscCall(ISIdentity(isrow, &row_identity)); 1742 PetscCall(ISIdentity(iscol, &col_identity)); 1743 if (!levels && row_identity && col_identity) { 1744 /* special case: ilu(0) with natural ordering */ 1745 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1746 if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1747 PetscFunctionReturn(PETSC_SUCCESS); 1748 } 1749 1750 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1751 PetscCall(ISGetIndices(isrow, &r)); 1752 PetscCall(ISGetIndices(isicol, &ic)); 1753 1754 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1755 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 1756 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1757 bi[0] = bdiag[0] = 0; 1758 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1759 1760 /* create a linked list for storing column indices of the active row */ 1761 nlnk = n + 1; 1762 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1763 1764 /* initial FreeSpace size is f*(ai[n]+1) */ 1765 f = info->fill; 1766 diagonal_fill = (PetscInt)info->diagonal_fill; 1767 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1768 current_space = free_space; 1769 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1770 current_space_lvl = free_space_lvl; 1771 for (i = 0; i < n; i++) { 1772 nzi = 0; 1773 /* copy current row into linked list */ 1774 nnz = ai[r[i] + 1] - ai[r[i]]; 1775 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1776 cols = aj + ai[r[i]]; 1777 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1778 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1779 nzi += nlnk; 1780 1781 /* make sure diagonal entry is included */ 1782 if (diagonal_fill && lnk[i] == -1) { 1783 fm = n; 1784 while (lnk[fm] < i) fm = lnk[fm]; 1785 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1786 lnk[fm] = i; 1787 lnk_lvl[i] = 0; 1788 nzi++; 1789 dcount++; 1790 } 1791 1792 /* add pivot rows into the active row */ 1793 nzbd = 0; 1794 prow = lnk[n]; 1795 while (prow < i) { 1796 nnz = bdiag[prow]; 1797 cols = bj_ptr[prow] + nnz + 1; 1798 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1799 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1800 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1801 nzi += nlnk; 1802 prow = lnk[prow]; 1803 nzbd++; 1804 } 1805 bdiag[i] = nzbd; 1806 bi[i + 1] = bi[i] + nzi; 1807 /* if free space is not available, make more free space */ 1808 if (current_space->local_remaining < nzi) { 1809 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 1810 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1811 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1812 reallocs++; 1813 } 1814 1815 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1816 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1817 bj_ptr[i] = current_space->array; 1818 bjlvl_ptr[i] = current_space_lvl->array; 1819 1820 /* make sure the active row i has diagonal entry */ 1821 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1822 1823 current_space->array += nzi; 1824 current_space->local_used += nzi; 1825 current_space->local_remaining -= nzi; 1826 current_space_lvl->array += nzi; 1827 current_space_lvl->local_used += nzi; 1828 current_space_lvl->local_remaining -= nzi; 1829 } 1830 1831 PetscCall(ISRestoreIndices(isrow, &r)); 1832 PetscCall(ISRestoreIndices(isicol, &ic)); 1833 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1834 PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj)); 1835 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1836 1837 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1838 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1839 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1840 1841 #if defined(PETSC_USE_INFO) 1842 { 1843 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1844 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1845 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1846 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1847 PetscCall(PetscInfo(A, "for best performance.\n")); 1848 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1849 } 1850 #endif 1851 /* put together the new matrix */ 1852 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1853 b = (Mat_SeqAIJ *)fact->data; 1854 b->free_ij = PETSC_TRUE; 1855 PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 1856 b->free_a = PETSC_TRUE; 1857 b->j = bj; 1858 b->i = bi; 1859 b->diag = bdiag; 1860 b->ilen = NULL; 1861 b->imax = NULL; 1862 b->row = isrow; 1863 b->col = iscol; 1864 PetscCall(PetscObjectReference((PetscObject)isrow)); 1865 PetscCall(PetscObjectReference((PetscObject)iscol)); 1866 b->icol = isicol; 1867 1868 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1869 /* In b structure: Free imax, ilen, old a, old j. 1870 Allocate bdiag, solve_work, new a, new j */ 1871 b->maxnz = b->nz = bdiag[0] + 1; 1872 1873 fact->info.factor_mallocs = reallocs; 1874 fact->info.fill_ratio_given = f; 1875 fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1876 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1877 if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1878 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1879 PetscFunctionReturn(PETSC_SUCCESS); 1880 } 1881 1882 #if 0 1883 // unused 1884 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1885 { 1886 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1887 IS isicol; 1888 const PetscInt *r, *ic; 1889 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1890 PetscInt *bi, *cols, nnz, *cols_lvl; 1891 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1892 PetscInt i, levels, diagonal_fill; 1893 PetscBool col_identity, row_identity; 1894 PetscReal f; 1895 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1896 PetscBT lnkbt; 1897 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1898 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1899 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1900 PetscBool missing; 1901 1902 PetscFunctionBegin; 1903 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1904 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1905 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1906 1907 f = info->fill; 1908 levels = (PetscInt)info->levels; 1909 diagonal_fill = (PetscInt)info->diagonal_fill; 1910 1911 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1912 1913 PetscCall(ISIdentity(isrow, &row_identity)); 1914 PetscCall(ISIdentity(iscol, &col_identity)); 1915 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1916 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 1917 1918 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1919 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1920 fact->factortype = MAT_FACTOR_ILU; 1921 (fact)->info.factor_mallocs = 0; 1922 (fact)->info.fill_ratio_given = info->fill; 1923 (fact)->info.fill_ratio_needed = 1.0; 1924 1925 b = (Mat_SeqAIJ *)fact->data; 1926 b->row = isrow; 1927 b->col = iscol; 1928 b->icol = isicol; 1929 PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 1930 PetscCall(PetscObjectReference((PetscObject)isrow)); 1931 PetscCall(PetscObjectReference((PetscObject)iscol)); 1932 PetscFunctionReturn(PETSC_SUCCESS); 1933 } 1934 1935 PetscCall(ISGetIndices(isrow, &r)); 1936 PetscCall(ISGetIndices(isicol, &ic)); 1937 1938 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1939 PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi)); 1940 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1941 bi[0] = bdiag[0] = 0; 1942 1943 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1944 1945 /* create a linked list for storing column indices of the active row */ 1946 nlnk = n + 1; 1947 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1948 1949 /* initial FreeSpace size is f*(ai[n]+1) */ 1950 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1951 current_space = free_space; 1952 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1953 current_space_lvl = free_space_lvl; 1954 1955 for (i = 0; i < n; i++) { 1956 nzi = 0; 1957 /* copy current row into linked list */ 1958 nnz = ai[r[i] + 1] - ai[r[i]]; 1959 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1960 cols = aj + ai[r[i]]; 1961 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1962 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1963 nzi += nlnk; 1964 1965 /* make sure diagonal entry is included */ 1966 if (diagonal_fill && lnk[i] == -1) { 1967 fm = n; 1968 while (lnk[fm] < i) fm = lnk[fm]; 1969 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1970 lnk[fm] = i; 1971 lnk_lvl[i] = 0; 1972 nzi++; 1973 dcount++; 1974 } 1975 1976 /* add pivot rows into the active row */ 1977 nzbd = 0; 1978 prow = lnk[n]; 1979 while (prow < i) { 1980 nnz = bdiag[prow]; 1981 cols = bj_ptr[prow] + nnz + 1; 1982 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1983 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1984 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1985 nzi += nlnk; 1986 prow = lnk[prow]; 1987 nzbd++; 1988 } 1989 bdiag[i] = nzbd; 1990 bi[i + 1] = bi[i] + nzi; 1991 1992 /* if free space is not available, make more free space */ 1993 if (current_space->local_remaining < nzi) { 1994 nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 1995 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1996 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1997 reallocs++; 1998 } 1999 2000 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2001 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2002 bj_ptr[i] = current_space->array; 2003 bjlvl_ptr[i] = current_space_lvl->array; 2004 2005 /* make sure the active row i has diagonal entry */ 2006 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 2007 2008 current_space->array += nzi; 2009 current_space->local_used += nzi; 2010 current_space->local_remaining -= nzi; 2011 current_space_lvl->array += nzi; 2012 current_space_lvl->local_used += nzi; 2013 current_space_lvl->local_remaining -= nzi; 2014 } 2015 2016 PetscCall(ISRestoreIndices(isrow, &r)); 2017 PetscCall(ISRestoreIndices(isicol, &ic)); 2018 2019 /* destroy list of free space and other temporary arrays */ 2020 PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj)); 2021 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 2022 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2023 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2024 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 2025 2026 #if defined(PETSC_USE_INFO) 2027 { 2028 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2029 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 2030 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 2031 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 2032 PetscCall(PetscInfo(A, "for best performance.\n")); 2033 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 2034 } 2035 #endif 2036 2037 /* put together the new matrix */ 2038 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 2039 b = (Mat_SeqAIJ *)fact->data; 2040 b->free_ij = PETSC_TRUE; 2041 PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a)); 2042 b->free_a = PETSC_TRUE; 2043 b->j = bj; 2044 b->i = bi; 2045 for (i = 0; i < n; i++) bdiag[i] += bi[i]; 2046 b->diag = bdiag; 2047 b->ilen = NULL; 2048 b->imax = NULL; 2049 b->row = isrow; 2050 b->col = iscol; 2051 PetscCall(PetscObjectReference((PetscObject)isrow)); 2052 PetscCall(PetscObjectReference((PetscObject)iscol)); 2053 b->icol = isicol; 2054 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 2055 /* In b structure: Free imax, ilen, old a, old j. 2056 Allocate bdiag, solve_work, new a, new j */ 2057 b->maxnz = b->nz = bi[n]; 2058 2059 fact->info.factor_mallocs = reallocs; 2060 fact->info.fill_ratio_given = f; 2061 fact->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2062 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2063 if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2064 PetscFunctionReturn(PETSC_SUCCESS); 2065 } 2066 #endif 2067 2068 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 2069 { 2070 Mat C = B; 2071 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2072 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2073 IS ip = b->row, iip = b->icol; 2074 const PetscInt *rip, *riip; 2075 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 2076 PetscInt *ai = a->i, *aj = a->j; 2077 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 2078 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2079 PetscBool perm_identity; 2080 FactorShiftCtx sctx; 2081 PetscReal rs; 2082 MatScalar d, *v; 2083 2084 PetscFunctionBegin; 2085 /* MatPivotSetUp(): initialize shift context sctx */ 2086 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2087 2088 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2089 sctx.shift_top = info->zeropivot; 2090 for (i = 0; i < mbs; i++) { 2091 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2092 d = (aa)[a->diag[i]]; 2093 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2094 v = aa + ai[i]; 2095 nz = ai[i + 1] - ai[i]; 2096 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2097 if (rs > sctx.shift_top) sctx.shift_top = rs; 2098 } 2099 sctx.shift_top *= 1.1; 2100 sctx.nshift_max = 5; 2101 sctx.shift_lo = 0.; 2102 sctx.shift_hi = 1.; 2103 } 2104 2105 PetscCall(ISGetIndices(ip, &rip)); 2106 PetscCall(ISGetIndices(iip, &riip)); 2107 2108 /* allocate working arrays 2109 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2110 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2111 */ 2112 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 2113 2114 do { 2115 sctx.newshift = PETSC_FALSE; 2116 2117 for (i = 0; i < mbs; i++) c2r[i] = mbs; 2118 if (mbs) il[0] = 0; 2119 2120 for (k = 0; k < mbs; k++) { 2121 /* zero rtmp */ 2122 nz = bi[k + 1] - bi[k]; 2123 bjtmp = bj + bi[k]; 2124 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2125 2126 /* load in initial unfactored row */ 2127 bval = ba + bi[k]; 2128 jmin = ai[rip[k]]; 2129 jmax = ai[rip[k] + 1]; 2130 for (j = jmin; j < jmax; j++) { 2131 col = riip[aj[j]]; 2132 if (col >= k) { /* only take upper triangular entry */ 2133 rtmp[col] = aa[j]; 2134 *bval++ = 0.0; /* for in-place factorization */ 2135 } 2136 } 2137 /* shift the diagonal of the matrix: ZeropivotApply() */ 2138 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2139 2140 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2141 dk = rtmp[k]; 2142 i = c2r[k]; /* first row to be added to k_th row */ 2143 2144 while (i < k) { 2145 nexti = c2r[i]; /* next row to be added to k_th row */ 2146 2147 /* compute multiplier, update diag(k) and U(i,k) */ 2148 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2149 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 2150 dk += uikdi * ba[ili]; /* update diag[k] */ 2151 ba[ili] = uikdi; /* -U(i,k) */ 2152 2153 /* add multiple of row i to k-th row */ 2154 jmin = ili + 1; 2155 jmax = bi[i + 1]; 2156 if (jmin < jmax) { 2157 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2158 /* update il and c2r for row i */ 2159 il[i] = jmin; 2160 j = bj[jmin]; 2161 c2r[i] = c2r[j]; 2162 c2r[j] = i; 2163 } 2164 i = nexti; 2165 } 2166 2167 /* copy data into U(k,:) */ 2168 rs = 0.0; 2169 jmin = bi[k]; 2170 jmax = bi[k + 1] - 1; 2171 if (jmin < jmax) { 2172 for (j = jmin; j < jmax; j++) { 2173 col = bj[j]; 2174 ba[j] = rtmp[col]; 2175 rs += PetscAbsScalar(ba[j]); 2176 } 2177 /* add the k-th row into il and c2r */ 2178 il[k] = jmin; 2179 i = bj[jmin]; 2180 c2r[k] = c2r[i]; 2181 c2r[i] = k; 2182 } 2183 2184 /* MatPivotCheck() */ 2185 sctx.rs = rs; 2186 sctx.pv = dk; 2187 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 2188 if (sctx.newshift) break; 2189 dk = sctx.pv; 2190 2191 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 2192 } 2193 } while (sctx.newshift); 2194 2195 PetscCall(PetscFree3(rtmp, il, c2r)); 2196 PetscCall(ISRestoreIndices(ip, &rip)); 2197 PetscCall(ISRestoreIndices(iip, &riip)); 2198 2199 PetscCall(ISIdentity(ip, &perm_identity)); 2200 if (perm_identity) { 2201 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2202 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2203 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2204 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2205 } else { 2206 B->ops->solve = MatSolve_SeqSBAIJ_1; 2207 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2208 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2209 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2210 } 2211 2212 C->assembled = PETSC_TRUE; 2213 C->preallocated = PETSC_TRUE; 2214 2215 PetscCall(PetscLogFlops(C->rmap->n)); 2216 2217 /* MatPivotView() */ 2218 if (sctx.nshift) { 2219 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2220 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2221 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2222 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2223 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2224 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 2225 } 2226 } 2227 PetscFunctionReturn(PETSC_SUCCESS); 2228 } 2229 2230 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 2231 { 2232 Mat C = B; 2233 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2234 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2235 IS ip = b->row, iip = b->icol; 2236 const PetscInt *rip, *riip; 2237 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 2238 PetscInt *ai = a->i, *aj = a->j; 2239 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2240 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2241 PetscBool perm_identity; 2242 FactorShiftCtx sctx; 2243 PetscReal rs; 2244 MatScalar d, *v; 2245 2246 PetscFunctionBegin; 2247 /* MatPivotSetUp(): initialize shift context sctx */ 2248 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2249 2250 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2251 sctx.shift_top = info->zeropivot; 2252 for (i = 0; i < mbs; i++) { 2253 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2254 d = (aa)[a->diag[i]]; 2255 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2256 v = aa + ai[i]; 2257 nz = ai[i + 1] - ai[i]; 2258 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2259 if (rs > sctx.shift_top) sctx.shift_top = rs; 2260 } 2261 sctx.shift_top *= 1.1; 2262 sctx.nshift_max = 5; 2263 sctx.shift_lo = 0.; 2264 sctx.shift_hi = 1.; 2265 } 2266 2267 PetscCall(ISGetIndices(ip, &rip)); 2268 PetscCall(ISGetIndices(iip, &riip)); 2269 2270 /* initialization */ 2271 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 2272 2273 do { 2274 sctx.newshift = PETSC_FALSE; 2275 2276 for (i = 0; i < mbs; i++) jl[i] = mbs; 2277 il[0] = 0; 2278 2279 for (k = 0; k < mbs; k++) { 2280 /* zero rtmp */ 2281 nz = bi[k + 1] - bi[k]; 2282 bjtmp = bj + bi[k]; 2283 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2284 2285 bval = ba + bi[k]; 2286 /* initialize k-th row by the perm[k]-th row of A */ 2287 jmin = ai[rip[k]]; 2288 jmax = ai[rip[k] + 1]; 2289 for (j = jmin; j < jmax; j++) { 2290 col = riip[aj[j]]; 2291 if (col >= k) { /* only take upper triangular entry */ 2292 rtmp[col] = aa[j]; 2293 *bval++ = 0.0; /* for in-place factorization */ 2294 } 2295 } 2296 /* shift the diagonal of the matrix */ 2297 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2298 2299 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2300 dk = rtmp[k]; 2301 i = jl[k]; /* first row to be added to k_th row */ 2302 2303 while (i < k) { 2304 nexti = jl[i]; /* next row to be added to k_th row */ 2305 2306 /* compute multiplier, update diag(k) and U(i,k) */ 2307 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2308 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 2309 dk += uikdi * ba[ili]; 2310 ba[ili] = uikdi; /* -U(i,k) */ 2311 2312 /* add multiple of row i to k-th row */ 2313 jmin = ili + 1; 2314 jmax = bi[i + 1]; 2315 if (jmin < jmax) { 2316 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2317 /* update il and jl for row i */ 2318 il[i] = jmin; 2319 j = bj[jmin]; 2320 jl[i] = jl[j]; 2321 jl[j] = i; 2322 } 2323 i = nexti; 2324 } 2325 2326 /* shift the diagonals when zero pivot is detected */ 2327 /* compute rs=sum of abs(off-diagonal) */ 2328 rs = 0.0; 2329 jmin = bi[k] + 1; 2330 nz = bi[k + 1] - jmin; 2331 bcol = bj + jmin; 2332 for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 2333 2334 sctx.rs = rs; 2335 sctx.pv = dk; 2336 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 2337 if (sctx.newshift) break; 2338 dk = sctx.pv; 2339 2340 /* copy data into U(k,:) */ 2341 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 2342 jmin = bi[k] + 1; 2343 jmax = bi[k + 1]; 2344 if (jmin < jmax) { 2345 for (j = jmin; j < jmax; j++) { 2346 col = bj[j]; 2347 ba[j] = rtmp[col]; 2348 } 2349 /* add the k-th row into il and jl */ 2350 il[k] = jmin; 2351 i = bj[jmin]; 2352 jl[k] = jl[i]; 2353 jl[i] = k; 2354 } 2355 } 2356 } while (sctx.newshift); 2357 2358 PetscCall(PetscFree3(rtmp, il, jl)); 2359 PetscCall(ISRestoreIndices(ip, &rip)); 2360 PetscCall(ISRestoreIndices(iip, &riip)); 2361 2362 PetscCall(ISIdentity(ip, &perm_identity)); 2363 if (perm_identity) { 2364 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2365 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2366 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2367 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2368 } else { 2369 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2370 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2371 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2372 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2373 } 2374 2375 C->assembled = PETSC_TRUE; 2376 C->preallocated = PETSC_TRUE; 2377 2378 PetscCall(PetscLogFlops(C->rmap->n)); 2379 if (sctx.nshift) { 2380 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2381 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2382 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2383 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2384 } 2385 } 2386 PetscFunctionReturn(PETSC_SUCCESS); 2387 } 2388 2389 /* 2390 icc() under revised new data structure. 2391 Factored arrays bj and ba are stored as 2392 U(0,:),...,U(i,:),U(n-1,:) 2393 2394 ui=fact->i is an array of size n+1, in which 2395 ui+ 2396 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2397 ui[n]: points to U(n-1,n-1)+1 2398 2399 udiag=fact->diag is an array of size n,in which 2400 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2401 2402 U(i,:) contains udiag[i] as its last entry, i.e., 2403 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2404 */ 2405 2406 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2407 { 2408 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2409 Mat_SeqSBAIJ *b; 2410 PetscBool perm_identity, missing; 2411 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2412 const PetscInt *rip, *riip; 2413 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2414 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2415 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2416 PetscReal fill = info->fill, levels = info->levels; 2417 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2418 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2419 PetscBT lnkbt; 2420 IS iperm; 2421 2422 PetscFunctionBegin; 2423 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2424 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2425 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2426 PetscCall(ISIdentity(perm, &perm_identity)); 2427 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2428 2429 PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 2430 PetscCall(PetscMalloc1(am + 1, &udiag)); 2431 ui[0] = 0; 2432 2433 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2434 if (!levels && perm_identity) { 2435 for (i = 0; i < am; i++) { 2436 ncols = ai[i + 1] - a->diag[i]; 2437 ui[i + 1] = ui[i] + ncols; 2438 udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2439 } 2440 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2441 cols = uj; 2442 for (i = 0; i < am; i++) { 2443 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2444 ncols = ai[i + 1] - a->diag[i] - 1; 2445 for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2446 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2447 } 2448 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2449 PetscCall(ISGetIndices(iperm, &riip)); 2450 PetscCall(ISGetIndices(perm, &rip)); 2451 2452 /* initialization */ 2453 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2454 2455 /* jl: linked list for storing indices of the pivot rows 2456 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2457 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2458 for (i = 0; i < am; i++) { 2459 jl[i] = am; 2460 il[i] = 0; 2461 } 2462 2463 /* create and initialize a linked list for storing column indices of the active row k */ 2464 nlnk = am + 1; 2465 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2466 2467 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2468 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2469 current_space = free_space; 2470 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 2471 current_space_lvl = free_space_lvl; 2472 2473 for (k = 0; k < am; k++) { /* for each active row k */ 2474 /* initialize lnk by the column indices of row rip[k] of A */ 2475 nzk = 0; 2476 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2477 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2478 ncols_upper = 0; 2479 for (j = 0; j < ncols; j++) { 2480 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2481 if (riip[i] >= k) { /* only take upper triangular entry */ 2482 ajtmp[ncols_upper] = i; 2483 ncols_upper++; 2484 } 2485 } 2486 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2487 nzk += nlnk; 2488 2489 /* update lnk by computing fill-in for each pivot row to be merged in */ 2490 prow = jl[k]; /* 1st pivot row */ 2491 2492 while (prow < k) { 2493 nextprow = jl[prow]; 2494 2495 /* merge prow into k-th row */ 2496 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2497 jmax = ui[prow + 1]; 2498 ncols = jmax - jmin; 2499 i = jmin - ui[prow]; 2500 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2501 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2502 j = *(uj - 1); 2503 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2504 nzk += nlnk; 2505 2506 /* update il and jl for prow */ 2507 if (jmin < jmax) { 2508 il[prow] = jmin; 2509 j = *cols; 2510 jl[prow] = jl[j]; 2511 jl[j] = prow; 2512 } 2513 prow = nextprow; 2514 } 2515 2516 /* if free space is not available, make more free space */ 2517 if (current_space->local_remaining < nzk) { 2518 i = am - k + 1; /* num of unfactored rows */ 2519 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2520 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2521 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2522 reallocs++; 2523 } 2524 2525 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2526 PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2527 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2528 2529 /* add the k-th row into il and jl */ 2530 if (nzk > 1) { 2531 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2532 jl[k] = jl[i]; 2533 jl[i] = k; 2534 il[k] = ui[k] + 1; 2535 } 2536 uj_ptr[k] = current_space->array; 2537 uj_lvl_ptr[k] = current_space_lvl->array; 2538 2539 current_space->array += nzk; 2540 current_space->local_used += nzk; 2541 current_space->local_remaining -= nzk; 2542 2543 current_space_lvl->array += nzk; 2544 current_space_lvl->local_used += nzk; 2545 current_space_lvl->local_remaining -= nzk; 2546 2547 ui[k + 1] = ui[k] + nzk; 2548 } 2549 2550 PetscCall(ISRestoreIndices(perm, &rip)); 2551 PetscCall(ISRestoreIndices(iperm, &riip)); 2552 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2553 PetscCall(PetscFree(ajtmp)); 2554 2555 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2556 PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj)); 2557 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2558 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2559 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2560 2561 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2562 2563 /* put together the new matrix in MATSEQSBAIJ format */ 2564 b = (Mat_SeqSBAIJ *)fact->data; 2565 b->free_ij = PETSC_TRUE; 2566 PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a)); 2567 b->free_a = PETSC_TRUE; 2568 b->j = uj; 2569 b->i = ui; 2570 b->diag = udiag; 2571 b->free_diag = PETSC_TRUE; 2572 b->ilen = NULL; 2573 b->imax = NULL; 2574 b->row = perm; 2575 b->col = perm; 2576 PetscCall(PetscObjectReference((PetscObject)perm)); 2577 PetscCall(PetscObjectReference((PetscObject)perm)); 2578 b->icol = iperm; 2579 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2580 2581 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2582 2583 b->maxnz = b->nz = ui[am]; 2584 2585 fact->info.factor_mallocs = reallocs; 2586 fact->info.fill_ratio_given = fill; 2587 if (ai[am] != 0) { 2588 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2589 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2590 } else { 2591 fact->info.fill_ratio_needed = 0.0; 2592 } 2593 #if defined(PETSC_USE_INFO) 2594 if (ai[am] != 0) { 2595 PetscReal af = fact->info.fill_ratio_needed; 2596 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2597 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2598 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2599 } else { 2600 PetscCall(PetscInfo(A, "Empty matrix\n")); 2601 } 2602 #endif 2603 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2604 PetscFunctionReturn(PETSC_SUCCESS); 2605 } 2606 2607 #if 0 2608 // unused 2609 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2610 { 2611 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2612 Mat_SeqSBAIJ *b; 2613 PetscBool perm_identity, missing; 2614 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2615 const PetscInt *rip, *riip; 2616 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2617 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2618 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2619 PetscReal fill = info->fill, levels = info->levels; 2620 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2621 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2622 PetscBT lnkbt; 2623 IS iperm; 2624 2625 PetscFunctionBegin; 2626 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2627 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2628 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2629 PetscCall(ISIdentity(perm, &perm_identity)); 2630 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2631 2632 PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui)); 2633 PetscCall(PetscMalloc1(am + 1, &udiag)); 2634 ui[0] = 0; 2635 2636 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2637 if (!levels && perm_identity) { 2638 for (i = 0; i < am; i++) { 2639 ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 2640 udiag[i] = ui[i]; 2641 } 2642 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2643 cols = uj; 2644 for (i = 0; i < am; i++) { 2645 aj = a->j + a->diag[i]; 2646 ncols = ui[i + 1] - ui[i]; 2647 for (j = 0; j < ncols; j++) *cols++ = *aj++; 2648 } 2649 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2650 PetscCall(ISGetIndices(iperm, &riip)); 2651 PetscCall(ISGetIndices(perm, &rip)); 2652 2653 /* initialization */ 2654 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2655 2656 /* jl: linked list for storing indices of the pivot rows 2657 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2658 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2659 for (i = 0; i < am; i++) { 2660 jl[i] = am; 2661 il[i] = 0; 2662 } 2663 2664 /* create and initialize a linked list for storing column indices of the active row k */ 2665 nlnk = am + 1; 2666 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2667 2668 /* initial FreeSpace size is fill*(ai[am]+1) */ 2669 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2670 current_space = free_space; 2671 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 2672 current_space_lvl = free_space_lvl; 2673 2674 for (k = 0; k < am; k++) { /* for each active row k */ 2675 /* initialize lnk by the column indices of row rip[k] of A */ 2676 nzk = 0; 2677 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2678 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2679 ncols_upper = 0; 2680 for (j = 0; j < ncols; j++) { 2681 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2682 if (riip[i] >= k) { /* only take upper triangular entry */ 2683 ajtmp[ncols_upper] = i; 2684 ncols_upper++; 2685 } 2686 } 2687 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2688 nzk += nlnk; 2689 2690 /* update lnk by computing fill-in for each pivot row to be merged in */ 2691 prow = jl[k]; /* 1st pivot row */ 2692 2693 while (prow < k) { 2694 nextprow = jl[prow]; 2695 2696 /* merge prow into k-th row */ 2697 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2698 jmax = ui[prow + 1]; 2699 ncols = jmax - jmin; 2700 i = jmin - ui[prow]; 2701 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2702 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2703 j = *(uj - 1); 2704 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2705 nzk += nlnk; 2706 2707 /* update il and jl for prow */ 2708 if (jmin < jmax) { 2709 il[prow] = jmin; 2710 j = *cols; 2711 jl[prow] = jl[j]; 2712 jl[j] = prow; 2713 } 2714 prow = nextprow; 2715 } 2716 2717 /* if free space is not available, make more free space */ 2718 if (current_space->local_remaining < nzk) { 2719 i = am - k + 1; /* num of unfactored rows */ 2720 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2721 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2722 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2723 reallocs++; 2724 } 2725 2726 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2727 PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2728 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2729 2730 /* add the k-th row into il and jl */ 2731 if (nzk > 1) { 2732 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2733 jl[k] = jl[i]; 2734 jl[i] = k; 2735 il[k] = ui[k] + 1; 2736 } 2737 uj_ptr[k] = current_space->array; 2738 uj_lvl_ptr[k] = current_space_lvl->array; 2739 2740 current_space->array += nzk; 2741 current_space->local_used += nzk; 2742 current_space->local_remaining -= nzk; 2743 2744 current_space_lvl->array += nzk; 2745 current_space_lvl->local_used += nzk; 2746 current_space_lvl->local_remaining -= nzk; 2747 2748 ui[k + 1] = ui[k] + nzk; 2749 } 2750 2751 #if defined(PETSC_USE_INFO) 2752 if (ai[am] != 0) { 2753 PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 2754 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2755 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2756 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2757 } else { 2758 PetscCall(PetscInfo(A, "Empty matrix\n")); 2759 } 2760 #endif 2761 2762 PetscCall(ISRestoreIndices(perm, &rip)); 2763 PetscCall(ISRestoreIndices(iperm, &riip)); 2764 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2765 PetscCall(PetscFree(ajtmp)); 2766 2767 /* destroy list of free space and other temporary array(s) */ 2768 PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj)); 2769 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 2770 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2771 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2772 2773 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2774 2775 /* put together the new matrix in MATSEQSBAIJ format */ 2776 2777 b = (Mat_SeqSBAIJ *)fact->data; 2778 b->free_ij = PETSC_TRUE; 2779 PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a)); 2780 b->free_a = PETSC_TRUE; 2781 b->j = uj; 2782 b->i = ui; 2783 b->diag = udiag; 2784 b->free_diag = PETSC_TRUE; 2785 b->ilen = NULL; 2786 b->imax = NULL; 2787 b->row = perm; 2788 b->col = perm; 2789 2790 PetscCall(PetscObjectReference((PetscObject)perm)); 2791 PetscCall(PetscObjectReference((PetscObject)perm)); 2792 2793 b->icol = iperm; 2794 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2795 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2796 b->maxnz = b->nz = ui[am]; 2797 2798 fact->info.factor_mallocs = reallocs; 2799 fact->info.fill_ratio_given = fill; 2800 if (ai[am] != 0) { 2801 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 2802 } else { 2803 fact->info.fill_ratio_needed = 0.0; 2804 } 2805 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2806 PetscFunctionReturn(PETSC_SUCCESS); 2807 } 2808 #endif 2809 2810 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2811 { 2812 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2813 Mat_SeqSBAIJ *b; 2814 PetscBool perm_identity, missing; 2815 PetscReal fill = info->fill; 2816 const PetscInt *rip, *riip; 2817 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2818 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2819 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 2820 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2821 PetscBT lnkbt; 2822 IS iperm; 2823 2824 PetscFunctionBegin; 2825 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2826 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2827 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2828 2829 /* check whether perm is the identity mapping */ 2830 PetscCall(ISIdentity(perm, &perm_identity)); 2831 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2832 PetscCall(ISGetIndices(iperm, &riip)); 2833 PetscCall(ISGetIndices(perm, &rip)); 2834 2835 /* initialization */ 2836 PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 2837 PetscCall(PetscMalloc1(am + 1, &udiag)); 2838 ui[0] = 0; 2839 2840 /* jl: linked list for storing indices of the pivot rows 2841 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2842 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2843 for (i = 0; i < am; i++) { 2844 jl[i] = am; 2845 il[i] = 0; 2846 } 2847 2848 /* create and initialize a linked list for storing column indices of the active row k */ 2849 nlnk = am + 1; 2850 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2851 2852 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2853 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2854 current_space = free_space; 2855 2856 for (k = 0; k < am; k++) { /* for each active row k */ 2857 /* initialize lnk by the column indices of row rip[k] of A */ 2858 nzk = 0; 2859 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2860 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2861 ncols_upper = 0; 2862 for (j = 0; j < ncols; j++) { 2863 i = riip[*(aj + ai[rip[k]] + j)]; 2864 if (i >= k) { /* only take upper triangular entry */ 2865 cols[ncols_upper] = i; 2866 ncols_upper++; 2867 } 2868 } 2869 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2870 nzk += nlnk; 2871 2872 /* update lnk by computing fill-in for each pivot row to be merged in */ 2873 prow = jl[k]; /* 1st pivot row */ 2874 2875 while (prow < k) { 2876 nextprow = jl[prow]; 2877 /* merge prow into k-th row */ 2878 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2879 jmax = ui[prow + 1]; 2880 ncols = jmax - jmin; 2881 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2882 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2883 nzk += nlnk; 2884 2885 /* update il and jl for prow */ 2886 if (jmin < jmax) { 2887 il[prow] = jmin; 2888 j = *uj_ptr; 2889 jl[prow] = jl[j]; 2890 jl[j] = prow; 2891 } 2892 prow = nextprow; 2893 } 2894 2895 /* if free space is not available, make more free space */ 2896 if (current_space->local_remaining < nzk) { 2897 i = am - k + 1; /* num of unfactored rows */ 2898 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2899 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2900 reallocs++; 2901 } 2902 2903 /* copy data into free space, then initialize lnk */ 2904 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2905 2906 /* add the k-th row into il and jl */ 2907 if (nzk > 1) { 2908 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2909 jl[k] = jl[i]; 2910 jl[i] = k; 2911 il[k] = ui[k] + 1; 2912 } 2913 ui_ptr[k] = current_space->array; 2914 2915 current_space->array += nzk; 2916 current_space->local_used += nzk; 2917 current_space->local_remaining -= nzk; 2918 2919 ui[k + 1] = ui[k] + nzk; 2920 } 2921 2922 PetscCall(ISRestoreIndices(perm, &rip)); 2923 PetscCall(ISRestoreIndices(iperm, &riip)); 2924 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 2925 2926 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2927 PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj)); 2928 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2929 PetscCall(PetscLLDestroy(lnk, lnkbt)); 2930 2931 /* put together the new matrix in MATSEQSBAIJ format */ 2932 b = (Mat_SeqSBAIJ *)fact->data; 2933 b->free_ij = PETSC_TRUE; 2934 PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a)); 2935 b->free_a = PETSC_TRUE; 2936 b->j = uj; 2937 b->i = ui; 2938 b->diag = udiag; 2939 b->free_diag = PETSC_TRUE; 2940 b->ilen = NULL; 2941 b->imax = NULL; 2942 b->row = perm; 2943 b->col = perm; 2944 2945 PetscCall(PetscObjectReference((PetscObject)perm)); 2946 PetscCall(PetscObjectReference((PetscObject)perm)); 2947 2948 b->icol = iperm; 2949 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2950 2951 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2952 2953 b->maxnz = b->nz = ui[am]; 2954 2955 fact->info.factor_mallocs = reallocs; 2956 fact->info.fill_ratio_given = fill; 2957 if (ai[am] != 0) { 2958 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2959 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2960 } else { 2961 fact->info.fill_ratio_needed = 0.0; 2962 } 2963 #if defined(PETSC_USE_INFO) 2964 if (ai[am] != 0) { 2965 PetscReal af = fact->info.fill_ratio_needed; 2966 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2967 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2968 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2969 } else { 2970 PetscCall(PetscInfo(A, "Empty matrix\n")); 2971 } 2972 #endif 2973 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2974 PetscFunctionReturn(PETSC_SUCCESS); 2975 } 2976 2977 #if 0 2978 // unused 2979 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2980 { 2981 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2982 Mat_SeqSBAIJ *b; 2983 PetscBool perm_identity, missing; 2984 PetscReal fill = info->fill; 2985 const PetscInt *rip, *riip; 2986 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2987 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2988 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 2989 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2990 PetscBT lnkbt; 2991 IS iperm; 2992 2993 PetscFunctionBegin; 2994 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2995 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2996 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2997 2998 /* check whether perm is the identity mapping */ 2999 PetscCall(ISIdentity(perm, &perm_identity)); 3000 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 3001 PetscCall(ISGetIndices(iperm, &riip)); 3002 PetscCall(ISGetIndices(perm, &rip)); 3003 3004 /* initialization */ 3005 PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui)); 3006 ui[0] = 0; 3007 3008 /* jl: linked list for storing indices of the pivot rows 3009 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3010 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 3011 for (i = 0; i < am; i++) { 3012 jl[i] = am; 3013 il[i] = 0; 3014 } 3015 3016 /* create and initialize a linked list for storing column indices of the active row k */ 3017 nlnk = am + 1; 3018 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 3019 3020 /* initial FreeSpace size is fill*(ai[am]+1) */ 3021 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 3022 current_space = free_space; 3023 3024 for (k = 0; k < am; k++) { /* for each active row k */ 3025 /* initialize lnk by the column indices of row rip[k] of A */ 3026 nzk = 0; 3027 ncols = ai[rip[k] + 1] - ai[rip[k]]; 3028 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 3029 ncols_upper = 0; 3030 for (j = 0; j < ncols; j++) { 3031 i = riip[*(aj + ai[rip[k]] + j)]; 3032 if (i >= k) { /* only take upper triangular entry */ 3033 cols[ncols_upper] = i; 3034 ncols_upper++; 3035 } 3036 } 3037 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 3038 nzk += nlnk; 3039 3040 /* update lnk by computing fill-in for each pivot row to be merged in */ 3041 prow = jl[k]; /* 1st pivot row */ 3042 3043 while (prow < k) { 3044 nextprow = jl[prow]; 3045 /* merge prow into k-th row */ 3046 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3047 jmax = ui[prow + 1]; 3048 ncols = jmax - jmin; 3049 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3050 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 3051 nzk += nlnk; 3052 3053 /* update il and jl for prow */ 3054 if (jmin < jmax) { 3055 il[prow] = jmin; 3056 j = *uj_ptr; 3057 jl[prow] = jl[j]; 3058 jl[j] = prow; 3059 } 3060 prow = nextprow; 3061 } 3062 3063 /* if free space is not available, make more free space */ 3064 if (current_space->local_remaining < nzk) { 3065 i = am - k + 1; /* num of unfactored rows */ 3066 i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3067 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 3068 reallocs++; 3069 } 3070 3071 /* copy data into free space, then initialize lnk */ 3072 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 3073 3074 /* add the k-th row into il and jl */ 3075 if (nzk - 1 > 0) { 3076 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3077 jl[k] = jl[i]; 3078 jl[i] = k; 3079 il[k] = ui[k] + 1; 3080 } 3081 ui_ptr[k] = current_space->array; 3082 3083 current_space->array += nzk; 3084 current_space->local_used += nzk; 3085 current_space->local_remaining -= nzk; 3086 3087 ui[k + 1] = ui[k] + nzk; 3088 } 3089 3090 #if defined(PETSC_USE_INFO) 3091 if (ai[am] != 0) { 3092 PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am]; 3093 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 3094 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3095 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 3096 } else { 3097 PetscCall(PetscInfo(A, "Empty matrix\n")); 3098 } 3099 #endif 3100 3101 PetscCall(ISRestoreIndices(perm, &rip)); 3102 PetscCall(ISRestoreIndices(iperm, &riip)); 3103 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 3104 3105 /* destroy list of free space and other temporary array(s) */ 3106 PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj)); 3107 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 3108 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3109 3110 /* put together the new matrix in MATSEQSBAIJ format */ 3111 b = (Mat_SeqSBAIJ *)fact->data; 3112 b->free_ij = PETSC_TRUE; 3113 PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a)); 3114 b->free_a = PETSC_TRUE; 3115 b->j = uj; 3116 b->i = ui; 3117 b->diag = NULL; 3118 b->ilen = NULL; 3119 b->imax = NULL; 3120 b->row = perm; 3121 b->col = perm; 3122 3123 PetscCall(PetscObjectReference((PetscObject)perm)); 3124 PetscCall(PetscObjectReference((PetscObject)perm)); 3125 3126 b->icol = iperm; 3127 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3128 3129 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 3130 b->maxnz = b->nz = ui[am]; 3131 3132 fact->info.factor_mallocs = reallocs; 3133 fact->info.fill_ratio_given = fill; 3134 if (ai[am] != 0) { 3135 fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am]; 3136 } else { 3137 fact->info.fill_ratio_needed = 0.0; 3138 } 3139 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3140 PetscFunctionReturn(PETSC_SUCCESS); 3141 } 3142 #endif 3143 3144 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 3145 { 3146 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3147 PetscInt n = A->rmap->n; 3148 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3149 PetscScalar *x, sum; 3150 const PetscScalar *b; 3151 const MatScalar *aa = a->a, *v; 3152 PetscInt i, nz; 3153 3154 PetscFunctionBegin; 3155 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3156 3157 PetscCall(VecGetArrayRead(bb, &b)); 3158 PetscCall(VecGetArrayWrite(xx, &x)); 3159 3160 /* forward solve the lower triangular */ 3161 x[0] = b[0]; 3162 v = aa; 3163 vi = aj; 3164 for (i = 1; i < n; i++) { 3165 nz = ai[i + 1] - ai[i]; 3166 sum = b[i]; 3167 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3168 v += nz; 3169 vi += nz; 3170 x[i] = sum; 3171 } 3172 3173 /* backward solve the upper triangular */ 3174 for (i = n - 1; i >= 0; i--) { 3175 v = aa + adiag[i + 1] + 1; 3176 vi = aj + adiag[i + 1] + 1; 3177 nz = adiag[i] - adiag[i + 1] - 1; 3178 sum = x[i]; 3179 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3180 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3181 } 3182 3183 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3184 PetscCall(VecRestoreArrayRead(bb, &b)); 3185 PetscCall(VecRestoreArrayWrite(xx, &x)); 3186 PetscFunctionReturn(PETSC_SUCCESS); 3187 } 3188 3189 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 3190 { 3191 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3192 IS iscol = a->col, isrow = a->row; 3193 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3194 const PetscInt *rout, *cout, *r, *c; 3195 PetscScalar *x, *tmp, sum; 3196 const PetscScalar *b; 3197 const MatScalar *aa = a->a, *v; 3198 3199 PetscFunctionBegin; 3200 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3201 3202 PetscCall(VecGetArrayRead(bb, &b)); 3203 PetscCall(VecGetArrayWrite(xx, &x)); 3204 tmp = a->solve_work; 3205 3206 PetscCall(ISGetIndices(isrow, &rout)); 3207 r = rout; 3208 PetscCall(ISGetIndices(iscol, &cout)); 3209 c = cout; 3210 3211 /* forward solve the lower triangular */ 3212 tmp[0] = b[r[0]]; 3213 v = aa; 3214 vi = aj; 3215 for (i = 1; i < n; i++) { 3216 nz = ai[i + 1] - ai[i]; 3217 sum = b[r[i]]; 3218 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3219 tmp[i] = sum; 3220 v += nz; 3221 vi += nz; 3222 } 3223 3224 /* backward solve the upper triangular */ 3225 for (i = n - 1; i >= 0; i--) { 3226 v = aa + adiag[i + 1] + 1; 3227 vi = aj + adiag[i + 1] + 1; 3228 nz = adiag[i] - adiag[i + 1] - 1; 3229 sum = tmp[i]; 3230 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3231 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3232 } 3233 3234 PetscCall(ISRestoreIndices(isrow, &rout)); 3235 PetscCall(ISRestoreIndices(iscol, &cout)); 3236 PetscCall(VecRestoreArrayRead(bb, &b)); 3237 PetscCall(VecRestoreArrayWrite(xx, &x)); 3238 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3239 PetscFunctionReturn(PETSC_SUCCESS); 3240 } 3241 3242 #if 0 3243 // unused 3244 /* 3245 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3246 */ 3247 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) 3248 { 3249 Mat B = *fact; 3250 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3251 IS isicol; 3252 const PetscInt *r, *ic; 3253 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3254 PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3255 PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 3256 PetscInt nlnk, *lnk; 3257 PetscBT lnkbt; 3258 PetscBool row_identity, icol_identity; 3259 MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 3260 const PetscInt *ics; 3261 PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3262 PetscReal dt = info->dt, shift = info->shiftamount; 3263 PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3264 PetscBool missing; 3265 3266 PetscFunctionBegin; 3267 if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005; 3268 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3269 3270 /* symbolic factorization, can be reused */ 3271 PetscCall(MatMissingDiagonal(A, &missing, &i)); 3272 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 3273 adiag = a->diag; 3274 3275 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3276 3277 /* bdiag is location of diagonal in factor */ 3278 PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 3279 PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3280 3281 /* allocate row pointers bi */ 3282 PetscCall(PetscShmgetAllocateArray(2 * n + 2,sizeof(PetscInt),(void **)&bi)); 3283 3284 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3285 if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 3286 nnz_max = ai[n] + 2 * n * dtcount + 2; 3287 3288 PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscInt),(void **)&bj)); 3289 PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscScalar),(void **)&ba)); 3290 3291 /* put together the new matrix */ 3292 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3293 b = (Mat_SeqAIJ *)B->data; 3294 b->free_a = PETSC_TRUE; 3295 b->free_ij = PETSC_TRUE; 3296 b->a = ba; 3297 b->j = bj; 3298 b->i = bi; 3299 b->diag = bdiag; 3300 b->ilen = NULL; 3301 b->imax = NULL; 3302 b->row = isrow; 3303 b->col = iscol; 3304 PetscCall(PetscObjectReference((PetscObject)isrow)); 3305 PetscCall(PetscObjectReference((PetscObject)iscol)); 3306 b->icol = isicol; 3307 3308 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 3309 b->maxnz = nnz_max; 3310 3311 B->factortype = MAT_FACTOR_ILUDT; 3312 B->info.factor_mallocs = 0; 3313 B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 3314 /* end of symbolic factorization */ 3315 3316 PetscCall(ISGetIndices(isrow, &r)); 3317 PetscCall(ISGetIndices(isicol, &ic)); 3318 ics = ic; 3319 3320 /* linked list for storing column indices of the active row */ 3321 nlnk = n + 1; 3322 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3323 3324 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3325 PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 3326 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3327 PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 3328 PetscCall(PetscArrayzero(rtmp, n)); 3329 3330 bi[0] = 0; 3331 bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3332 bdiag_rev[n] = bdiag[0]; 3333 bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3334 for (i = 0; i < n; i++) { 3335 /* copy initial fill into linked list */ 3336 nzi = ai[r[i] + 1] - ai[r[i]]; 3337 PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 3338 nzi_al = adiag[r[i]] - ai[r[i]]; 3339 nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3340 ajtmp = aj + ai[r[i]]; 3341 PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3342 3343 /* load in initial (unfactored row) */ 3344 aatmp = a->a + ai[r[i]]; 3345 for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++; 3346 3347 /* add pivot rows into linked list */ 3348 row = lnk[n]; 3349 while (row < i) { 3350 nzi_bl = bi[row + 1] - bi[row] + 1; 3351 bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 3352 PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3353 nzi += nlnk; 3354 row = lnk[row]; 3355 } 3356 3357 /* copy data from lnk into jtmp, then initialize lnk */ 3358 PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3359 3360 /* numerical factorization */ 3361 bjtmp = jtmp; 3362 row = *bjtmp++; /* 1st pivot row */ 3363 while (row < i) { 3364 pc = rtmp + row; 3365 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3366 multiplier = (*pc) * (*pv); 3367 *pc = multiplier; 3368 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3369 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3370 pv = ba + bdiag[row + 1] + 1; 3371 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3372 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3373 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3374 } 3375 row = *bjtmp++; 3376 } 3377 3378 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3379 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3380 nzi_bl = 0; 3381 j = 0; 3382 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3383 vtmp[j] = rtmp[jtmp[j]]; 3384 rtmp[jtmp[j]] = 0.0; 3385 nzi_bl++; 3386 j++; 3387 } 3388 nzi_bu = nzi - nzi_bl - 1; 3389 while (j < nzi) { 3390 vtmp[j] = rtmp[jtmp[j]]; 3391 rtmp[jtmp[j]] = 0.0; 3392 j++; 3393 } 3394 3395 bjtmp = bj + bi[i]; 3396 batmp = ba + bi[i]; 3397 /* apply level dropping rule to L part */ 3398 ncut = nzi_al + dtcount; 3399 if (ncut < nzi_bl) { 3400 PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 3401 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3402 } else { 3403 ncut = nzi_bl; 3404 } 3405 for (j = 0; j < ncut; j++) { 3406 bjtmp[j] = jtmp[j]; 3407 batmp[j] = vtmp[j]; 3408 } 3409 bi[i + 1] = bi[i] + ncut; 3410 nzi = ncut + 1; 3411 3412 /* apply level dropping rule to U part */ 3413 ncut = nzi_au + dtcount; 3414 if (ncut < nzi_bu) { 3415 PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 3416 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 3417 } else { 3418 ncut = nzi_bu; 3419 } 3420 nzi += ncut; 3421 3422 /* mark bdiagonal */ 3423 bdiag[i + 1] = bdiag[i] - (ncut + 1); 3424 bdiag_rev[n - i - 1] = bdiag[i + 1]; 3425 bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 3426 bjtmp = bj + bdiag[i]; 3427 batmp = ba + bdiag[i]; 3428 *bjtmp = i; 3429 *batmp = diag_tmp; /* rtmp[i]; */ 3430 if (*batmp == 0.0) *batmp = dt + shift; 3431 *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 3432 3433 bjtmp = bj + bdiag[i + 1] + 1; 3434 batmp = ba + bdiag[i + 1] + 1; 3435 for (k = 0; k < ncut; k++) { 3436 bjtmp[k] = jtmp[nzi_bl + 1 + k]; 3437 batmp[k] = vtmp[nzi_bl + 1 + k]; 3438 } 3439 3440 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3441 } /* for (i=0; i<n; i++) */ 3442 PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3443 3444 PetscCall(ISRestoreIndices(isrow, &r)); 3445 PetscCall(ISRestoreIndices(isicol, &ic)); 3446 3447 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3448 PetscCall(PetscFree2(im, jtmp)); 3449 PetscCall(PetscFree2(rtmp, vtmp)); 3450 PetscCall(PetscFree(bdiag_rev)); 3451 3452 PetscCall(PetscLogFlops(B->cmap->n)); 3453 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3454 3455 PetscCall(ISIdentity(isrow, &row_identity)); 3456 PetscCall(ISIdentity(isicol, &icol_identity)); 3457 if (row_identity && icol_identity) { 3458 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3459 } else { 3460 B->ops->solve = MatSolve_SeqAIJ; 3461 } 3462 3463 B->ops->solveadd = NULL; 3464 B->ops->solvetranspose = NULL; 3465 B->ops->solvetransposeadd = NULL; 3466 B->ops->matsolve = NULL; 3467 B->assembled = PETSC_TRUE; 3468 B->preallocated = PETSC_TRUE; 3469 PetscFunctionReturn(PETSC_SUCCESS); 3470 } 3471 3472 /* a wrapper of MatILUDTFactor_SeqAIJ() */ 3473 /* 3474 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3475 */ 3476 3477 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 3478 { 3479 PetscFunctionBegin; 3480 PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 3481 PetscFunctionReturn(PETSC_SUCCESS); 3482 } 3483 3484 /* 3485 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3486 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3487 */ 3488 /* 3489 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3490 */ 3491 3492 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) 3493 { 3494 Mat C = fact; 3495 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 3496 IS isrow = b->row, isicol = b->icol; 3497 const PetscInt *r, *ic, *ics; 3498 PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3499 PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 3500 MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3501 PetscReal dt = info->dt, shift = info->shiftamount; 3502 PetscBool row_identity, col_identity; 3503 3504 PetscFunctionBegin; 3505 PetscCall(ISGetIndices(isrow, &r)); 3506 PetscCall(ISGetIndices(isicol, &ic)); 3507 PetscCall(PetscMalloc1(n + 1, &rtmp)); 3508 ics = ic; 3509 3510 for (i = 0; i < n; i++) { 3511 /* initialize rtmp array */ 3512 nzl = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */ 3513 bjtmp = bj + bi[i]; 3514 for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3515 rtmp[i] = 0.0; 3516 nzu = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */ 3517 bjtmp = bj + bdiag[i + 1] + 1; 3518 for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 3519 3520 /* load in initial unfactored row of A */ 3521 nz = ai[r[i] + 1] - ai[r[i]]; 3522 ajtmp = aj + ai[r[i]]; 3523 v = aa + ai[r[i]]; 3524 for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j]; 3525 3526 /* numerical factorization */ 3527 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3528 nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3529 k = 0; 3530 while (k < nzl) { 3531 row = *bjtmp++; 3532 pc = rtmp + row; 3533 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3534 multiplier = (*pc) * (*pv); 3535 *pc = multiplier; 3536 if (PetscAbsScalar(multiplier) > dt) { 3537 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3538 pv = b->a + bdiag[row + 1] + 1; 3539 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3540 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3541 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3542 } 3543 k++; 3544 } 3545 3546 /* finished row so stick it into b->a */ 3547 /* L-part */ 3548 pv = b->a + bi[i]; 3549 pj = bj + bi[i]; 3550 nzl = bi[i + 1] - bi[i]; 3551 for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]]; 3552 3553 /* diagonal: invert diagonal entries for simpler triangular solves */ 3554 if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3555 b->a[bdiag[i]] = 1.0 / rtmp[i]; 3556 3557 /* U-part */ 3558 pv = b->a + bdiag[i + 1] + 1; 3559 pj = bj + bdiag[i + 1] + 1; 3560 nzu = bdiag[i] - bdiag[i + 1] - 1; 3561 for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]]; 3562 } 3563 3564 PetscCall(PetscFree(rtmp)); 3565 PetscCall(ISRestoreIndices(isicol, &ic)); 3566 PetscCall(ISRestoreIndices(isrow, &r)); 3567 3568 PetscCall(ISIdentity(isrow, &row_identity)); 3569 PetscCall(ISIdentity(isicol, &col_identity)); 3570 if (row_identity && col_identity) { 3571 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3572 } else { 3573 C->ops->solve = MatSolve_SeqAIJ; 3574 } 3575 C->ops->solveadd = NULL; 3576 C->ops->solvetranspose = NULL; 3577 C->ops->solvetransposeadd = NULL; 3578 C->ops->matsolve = NULL; 3579 C->assembled = PETSC_TRUE; 3580 C->preallocated = PETSC_TRUE; 3581 3582 PetscCall(PetscLogFlops(C->cmap->n)); 3583 PetscFunctionReturn(PETSC_SUCCESS); 3584 } 3585 #endif 3586