1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <petscbt.h> 4 #include <../src/mat/utils/freespace.h> 5 6 /* 7 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 8 9 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 10 */ 11 #if 0 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) 13 { 14 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 15 PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 16 const PetscInt *ai = a->i, *aj = a->j; 17 const PetscScalar *aa = a->a; 18 PetscBool *done; 19 PetscReal best, past = 0, future; 20 21 PetscFunctionBegin; 22 /* pick initial row */ 23 best = -1; 24 for (i = 0; i < n; i++) { 25 future = 0.0; 26 for (j = ai[i]; j < ai[i + 1]; j++) { 27 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 28 else past = PetscAbsScalar(aa[j]); 29 } 30 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 31 if (past / future > best) { 32 best = past / future; 33 current = i; 34 } 35 } 36 37 PetscCall(PetscMalloc1(n, &done)); 38 PetscCall(PetscArrayzero(done, n)); 39 PetscCall(PetscMalloc1(n, &order)); 40 order[0] = current; 41 for (i = 0; i < n - 1; i++) { 42 done[current] = PETSC_TRUE; 43 best = -1; 44 /* loop over all neighbors of current pivot */ 45 for (j = ai[current]; j < ai[current + 1]; j++) { 46 jj = aj[j]; 47 if (done[jj]) continue; 48 /* loop over columns of potential next row computing weights for below and above diagonal */ 49 past = future = 0.0; 50 for (k = ai[jj]; k < ai[jj + 1]; k++) { 51 kk = aj[k]; 52 if (done[kk]) past += PetscAbsScalar(aa[k]); 53 else if (kk != jj) future += PetscAbsScalar(aa[k]); 54 } 55 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 56 if (past / future > best) { 57 best = past / future; 58 newcurrent = jj; 59 } 60 } 61 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 62 best = -1; 63 for (k = 0; k < n; k++) { 64 if (done[k]) continue; 65 future = 0.0; 66 past = 0.0; 67 for (j = ai[k]; j < ai[k + 1]; j++) { 68 kk = aj[j]; 69 if (done[kk]) past += PetscAbsScalar(aa[j]); 70 else if (kk != k) future += PetscAbsScalar(aa[j]); 71 } 72 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 73 if (past / future > best) { 74 best = past / future; 75 newcurrent = k; 76 } 77 } 78 } 79 PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 80 current = newcurrent; 81 order[i + 1] = current; 82 } 83 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 84 *icol = *irow; 85 PetscCall(PetscObjectReference((PetscObject)*irow)); 86 PetscCall(PetscFree(done)); 87 PetscCall(PetscFree(order)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 #endif 91 92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 93 { 94 PetscFunctionBegin; 95 *type = MATSOLVERPETSC; 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98 99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 100 { 101 PetscInt n = A->rmap->n; 102 103 PetscFunctionBegin; 104 #if defined(PETSC_USE_COMPLEX) 105 if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 106 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n")); 107 *B = NULL; 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 #endif 111 112 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 113 PetscCall(MatSetSizes(*B, n, n, n, n)); 114 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 115 PetscCall(MatSetType(*B, MATSEQAIJ)); 116 117 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 118 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 119 120 PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 121 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 122 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 123 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 124 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 125 PetscCall(MatSetType(*B, MATSEQSBAIJ)); 126 PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 127 128 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 129 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 130 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 131 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 132 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 133 (*B)->factortype = ftype; 134 135 PetscCall(PetscFree((*B)->solvertype)); 136 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 137 (*B)->canuseordering = PETSC_TRUE; 138 PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 139 PetscFunctionReturn(PETSC_SUCCESS); 140 } 141 142 #if 0 143 // currently unused 144 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 145 { 146 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 147 IS isicol; 148 const PetscInt *r, *ic; 149 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 150 PetscInt *bi, *bj, *ajtmp; 151 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 152 PetscReal f; 153 PetscInt nlnk, *lnk, k, **bi_ptr; 154 PetscFreeSpaceList free_space = NULL, current_space = NULL; 155 PetscBT lnkbt; 156 PetscBool missing; 157 158 PetscFunctionBegin; 159 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 160 PetscCall(MatMissingDiagonal(A, &missing, &i)); 161 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 162 163 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 164 PetscCall(ISGetIndices(isrow, &r)); 165 PetscCall(ISGetIndices(isicol, &ic)); 166 167 /* get new row pointers */ 168 PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi)); 169 bi[0] = 0; 170 171 /* bdiag is location of diagonal in factor */ 172 PetscCall(PetscMalloc1(n + 1, &bdiag)); 173 bdiag[0] = 0; 174 175 /* linked list for storing column indices of the active row */ 176 nlnk = n + 1; 177 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 178 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 179 180 /* initial FreeSpace size is f*(ai[n]+1) */ 181 f = info->fill; 182 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 183 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 184 current_space = free_space; 185 186 for (i = 0; i < n; i++) { 187 /* copy previous fill into linked list */ 188 nzi = 0; 189 nnz = ai[r[i] + 1] - ai[r[i]]; 190 ajtmp = aj + ai[r[i]]; 191 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 192 nzi += nlnk; 193 194 /* add pivot rows into linked list */ 195 row = lnk[n]; 196 while (row < i) { 197 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 198 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 199 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 200 nzi += nlnk; 201 row = lnk[row]; 202 } 203 bi[i + 1] = bi[i] + nzi; 204 im[i] = nzi; 205 206 /* mark bdiag */ 207 nzbd = 0; 208 nnz = nzi; 209 k = lnk[n]; 210 while (nnz-- && k < i) { 211 nzbd++; 212 k = lnk[k]; 213 } 214 bdiag[i] = bi[i] + nzbd; 215 216 /* if free space is not available, make more free space */ 217 if (current_space->local_remaining < nzi) { 218 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 219 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 220 reallocs++; 221 } 222 223 /* copy data into free space, then initialize lnk */ 224 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 225 226 bi_ptr[i] = current_space->array; 227 current_space->array += nzi; 228 current_space->local_used += nzi; 229 current_space->local_remaining -= nzi; 230 } 231 #if defined(PETSC_USE_INFO) 232 if (ai[n] != 0) { 233 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 234 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 235 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 236 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 237 PetscCall(PetscInfo(A, "for best performance.\n")); 238 } else { 239 PetscCall(PetscInfo(A, "Empty matrix\n")); 240 } 241 #endif 242 243 PetscCall(ISRestoreIndices(isrow, &r)); 244 PetscCall(ISRestoreIndices(isicol, &ic)); 245 246 /* destroy list of free space and other temporary array(s) */ 247 PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj)); 248 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 249 PetscCall(PetscLLDestroy(lnk, lnkbt)); 250 PetscCall(PetscFree2(bi_ptr, im)); 251 252 /* put together the new matrix */ 253 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 254 b = (Mat_SeqAIJ *)(B)->data; 255 b->free_ij = PETSC_TRUE; 256 PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a)); 257 b->free_a = PETSC_TRUE; 258 b->j = bj; 259 b->i = bi; 260 b->diag = bdiag; 261 b->ilen = NULL; 262 b->imax = NULL; 263 b->row = isrow; 264 b->col = iscol; 265 PetscCall(PetscObjectReference((PetscObject)isrow)); 266 PetscCall(PetscObjectReference((PetscObject)iscol)); 267 b->icol = isicol; 268 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 269 270 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 271 b->maxnz = b->nz = bi[n]; 272 273 (B)->factortype = MAT_FACTOR_LU; 274 (B)->info.factor_mallocs = reallocs; 275 (B)->info.fill_ratio_given = f; 276 277 if (ai[n]) { 278 (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 279 } else { 280 (B)->info.fill_ratio_needed = 0.0; 281 } 282 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 283 if (a->inode.size) (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 #endif 287 288 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 289 { 290 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 291 IS isicol; 292 const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 293 PetscInt i, n = A->rmap->n; 294 PetscInt *bi, *bj; 295 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 296 PetscReal f; 297 PetscInt nlnk, *lnk, k, **bi_ptr; 298 PetscFreeSpaceList free_space = NULL, current_space = NULL; 299 PetscBT lnkbt; 300 PetscBool missing; 301 302 PetscFunctionBegin; 303 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 304 PetscCall(MatMissingDiagonal(A, &missing, &i)); 305 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 306 307 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 308 PetscCall(ISGetIndices(isrow, &r)); 309 PetscCall(ISGetIndices(isicol, &ic)); 310 311 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 312 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 313 PetscCall(PetscMalloc1(n + 1, &bdiag)); 314 bi[0] = bdiag[0] = 0; 315 316 /* linked list for storing column indices of the active row */ 317 nlnk = n + 1; 318 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 319 320 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 321 322 /* initial FreeSpace size is f*(ai[n]+1) */ 323 f = info->fill; 324 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 325 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 326 current_space = free_space; 327 328 for (i = 0; i < n; i++) { 329 /* copy previous fill into linked list */ 330 nzi = 0; 331 nnz = ai[r[i] + 1] - ai[r[i]]; 332 ajtmp = aj + ai[r[i]]; 333 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 334 nzi += nlnk; 335 336 /* add pivot rows into linked list */ 337 row = lnk[n]; 338 while (row < i) { 339 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 340 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 341 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 342 nzi += nlnk; 343 row = lnk[row]; 344 } 345 bi[i + 1] = bi[i] + nzi; 346 im[i] = nzi; 347 348 /* mark bdiag */ 349 nzbd = 0; 350 nnz = nzi; 351 k = lnk[n]; 352 while (nnz-- && k < i) { 353 nzbd++; 354 k = lnk[k]; 355 } 356 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 357 358 /* if free space is not available, make more free space */ 359 if (current_space->local_remaining < nzi) { 360 /* estimated additional space needed */ 361 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 362 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 363 reallocs++; 364 } 365 366 /* copy data into free space, then initialize lnk */ 367 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 368 369 bi_ptr[i] = current_space->array; 370 current_space->array += nzi; 371 current_space->local_used += nzi; 372 current_space->local_remaining -= nzi; 373 } 374 375 PetscCall(ISRestoreIndices(isrow, &r)); 376 PetscCall(ISRestoreIndices(isicol, &ic)); 377 378 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 379 PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj)); 380 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 381 PetscCall(PetscLLDestroy(lnk, lnkbt)); 382 PetscCall(PetscFree2(bi_ptr, im)); 383 384 /* put together the new matrix */ 385 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 386 b = (Mat_SeqAIJ *)(B)->data; 387 b->free_ij = PETSC_TRUE; 388 PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 389 b->free_a = PETSC_TRUE; 390 b->j = bj; 391 b->i = bi; 392 b->diag = bdiag; 393 b->ilen = NULL; 394 b->imax = NULL; 395 b->row = isrow; 396 b->col = iscol; 397 PetscCall(PetscObjectReference((PetscObject)isrow)); 398 PetscCall(PetscObjectReference((PetscObject)iscol)); 399 b->icol = isicol; 400 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 401 402 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 403 b->maxnz = b->nz = bdiag[0] + 1; 404 405 B->factortype = MAT_FACTOR_LU; 406 B->info.factor_mallocs = reallocs; 407 B->info.fill_ratio_given = f; 408 409 if (ai[n]) { 410 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 411 } else { 412 B->info.fill_ratio_needed = 0.0; 413 } 414 #if defined(PETSC_USE_INFO) 415 if (ai[n] != 0) { 416 PetscReal af = B->info.fill_ratio_needed; 417 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 418 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 419 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 420 PetscCall(PetscInfo(A, "for best performance.\n")); 421 } else { 422 PetscCall(PetscInfo(A, "Empty matrix\n")); 423 } 424 #endif 425 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 426 if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 427 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 428 PetscFunctionReturn(PETSC_SUCCESS); 429 } 430 431 /* 432 Trouble in factorization, should we dump the original matrix? 433 */ 434 PetscErrorCode MatFactorDumpMatrix(Mat A) 435 { 436 PetscBool flg = PETSC_FALSE; 437 438 PetscFunctionBegin; 439 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 440 if (flg) { 441 PetscViewer viewer; 442 char filename[PETSC_MAX_PATH_LEN]; 443 444 PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 445 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 446 PetscCall(MatView(A, viewer)); 447 PetscCall(PetscViewerDestroy(&viewer)); 448 } 449 PetscFunctionReturn(PETSC_SUCCESS); 450 } 451 452 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 453 { 454 Mat C = B; 455 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 456 IS isrow = b->row, isicol = b->icol; 457 const PetscInt *r, *ic, *ics; 458 const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 459 PetscInt i, j, k, nz, nzL, row, *pj; 460 const PetscInt *ajtmp, *bjtmp; 461 MatScalar *rtmp, *pc, multiplier, *pv; 462 const MatScalar *aa = a->a, *v; 463 PetscBool row_identity, col_identity; 464 FactorShiftCtx sctx; 465 const PetscInt *ddiag; 466 PetscReal rs; 467 MatScalar d; 468 469 PetscFunctionBegin; 470 /* MatPivotSetUp(): initialize shift context sctx */ 471 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 472 473 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 474 ddiag = a->diag; 475 sctx.shift_top = info->zeropivot; 476 for (i = 0; i < n; i++) { 477 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 478 d = (aa)[ddiag[i]]; 479 rs = -PetscAbsScalar(d) - PetscRealPart(d); 480 v = aa + ai[i]; 481 nz = ai[i + 1] - ai[i]; 482 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 483 if (rs > sctx.shift_top) sctx.shift_top = rs; 484 } 485 sctx.shift_top *= 1.1; 486 sctx.nshift_max = 5; 487 sctx.shift_lo = 0.; 488 sctx.shift_hi = 1.; 489 } 490 491 PetscCall(ISGetIndices(isrow, &r)); 492 PetscCall(ISGetIndices(isicol, &ic)); 493 PetscCall(PetscMalloc1(n + 1, &rtmp)); 494 ics = ic; 495 496 do { 497 sctx.newshift = PETSC_FALSE; 498 for (i = 0; i < n; i++) { 499 /* zero rtmp */ 500 /* L part */ 501 nz = bi[i + 1] - bi[i]; 502 bjtmp = bj + bi[i]; 503 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 504 505 /* U part */ 506 nz = bdiag[i] - bdiag[i + 1]; 507 bjtmp = bj + bdiag[i + 1] + 1; 508 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 509 510 /* load in initial (unfactored row) */ 511 nz = ai[r[i] + 1] - ai[r[i]]; 512 ajtmp = aj + ai[r[i]]; 513 v = aa + ai[r[i]]; 514 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 515 /* ZeropivotApply() */ 516 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 517 518 /* elimination */ 519 bjtmp = bj + bi[i]; 520 row = *bjtmp++; 521 nzL = bi[i + 1] - bi[i]; 522 for (k = 0; k < nzL; k++) { 523 pc = rtmp + row; 524 if (*pc != 0.0) { 525 pv = b->a + bdiag[row]; 526 multiplier = *pc * (*pv); 527 *pc = multiplier; 528 529 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 530 pv = b->a + bdiag[row + 1] + 1; 531 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 532 533 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 534 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 535 } 536 row = *bjtmp++; 537 } 538 539 /* finished row so stick it into b->a */ 540 rs = 0.0; 541 /* L part */ 542 pv = b->a + bi[i]; 543 pj = b->j + bi[i]; 544 nz = bi[i + 1] - bi[i]; 545 for (j = 0; j < nz; j++) { 546 pv[j] = rtmp[pj[j]]; 547 rs += PetscAbsScalar(pv[j]); 548 } 549 550 /* U part */ 551 pv = b->a + bdiag[i + 1] + 1; 552 pj = b->j + bdiag[i + 1] + 1; 553 nz = bdiag[i] - bdiag[i + 1] - 1; 554 for (j = 0; j < nz; j++) { 555 pv[j] = rtmp[pj[j]]; 556 rs += PetscAbsScalar(pv[j]); 557 } 558 559 sctx.rs = rs; 560 sctx.pv = rtmp[i]; 561 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 562 if (sctx.newshift) break; /* break for-loop */ 563 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 564 565 /* Mark diagonal and invert diagonal for simpler triangular solves */ 566 pv = b->a + bdiag[i]; 567 *pv = 1.0 / rtmp[i]; 568 569 } /* endof for (i=0; i<n; i++) { */ 570 571 /* MatPivotRefine() */ 572 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 573 /* 574 * if no shift in this attempt & shifting & started shifting & can refine, 575 * then try lower shift 576 */ 577 sctx.shift_hi = sctx.shift_fraction; 578 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 579 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 580 sctx.newshift = PETSC_TRUE; 581 sctx.nshift++; 582 } 583 } while (sctx.newshift); 584 585 PetscCall(PetscFree(rtmp)); 586 PetscCall(ISRestoreIndices(isicol, &ic)); 587 PetscCall(ISRestoreIndices(isrow, &r)); 588 589 PetscCall(ISIdentity(isrow, &row_identity)); 590 PetscCall(ISIdentity(isicol, &col_identity)); 591 if (b->inode.size) { 592 C->ops->solve = MatSolve_SeqAIJ_Inode; 593 } else if (row_identity && col_identity) { 594 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 595 } else { 596 C->ops->solve = MatSolve_SeqAIJ; 597 } 598 C->ops->solveadd = MatSolveAdd_SeqAIJ; 599 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 600 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 601 C->ops->matsolve = MatMatSolve_SeqAIJ; 602 C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ; 603 C->assembled = PETSC_TRUE; 604 C->preallocated = PETSC_TRUE; 605 606 PetscCall(PetscLogFlops(C->cmap->n)); 607 608 /* MatShiftView(A,info,&sctx) */ 609 if (sctx.nshift) { 610 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 611 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 612 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 613 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 614 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 615 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 616 } 617 } 618 PetscFunctionReturn(PETSC_SUCCESS); 619 } 620 621 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat); 622 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec); 623 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec); 624 625 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 626 { 627 Mat C = B; 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 629 IS isrow = b->row, isicol = b->icol; 630 const PetscInt *r, *ic, *ics; 631 PetscInt nz, row, i, j, n = A->rmap->n, diag; 632 const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 633 const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 634 MatScalar *pv, *rtmp, *pc, multiplier, d; 635 const MatScalar *v, *aa = a->a; 636 PetscReal rs = 0.0; 637 FactorShiftCtx sctx; 638 const PetscInt *ddiag; 639 PetscBool row_identity, col_identity; 640 641 PetscFunctionBegin; 642 /* MatPivotSetUp(): initialize shift context sctx */ 643 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 644 645 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 646 ddiag = a->diag; 647 sctx.shift_top = info->zeropivot; 648 for (i = 0; i < n; i++) { 649 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 650 d = (aa)[ddiag[i]]; 651 rs = -PetscAbsScalar(d) - PetscRealPart(d); 652 v = aa + ai[i]; 653 nz = ai[i + 1] - ai[i]; 654 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 655 if (rs > sctx.shift_top) sctx.shift_top = rs; 656 } 657 sctx.shift_top *= 1.1; 658 sctx.nshift_max = 5; 659 sctx.shift_lo = 0.; 660 sctx.shift_hi = 1.; 661 } 662 663 PetscCall(ISGetIndices(isrow, &r)); 664 PetscCall(ISGetIndices(isicol, &ic)); 665 PetscCall(PetscMalloc1(n + 1, &rtmp)); 666 ics = ic; 667 668 do { 669 sctx.newshift = PETSC_FALSE; 670 for (i = 0; i < n; i++) { 671 nz = bi[i + 1] - bi[i]; 672 bjtmp = bj + bi[i]; 673 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 674 675 /* load in initial (unfactored row) */ 676 nz = ai[r[i] + 1] - ai[r[i]]; 677 ajtmp = aj + ai[r[i]]; 678 v = aa + ai[r[i]]; 679 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 680 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 681 682 row = *bjtmp++; 683 while (row < i) { 684 pc = rtmp + row; 685 if (*pc != 0.0) { 686 pv = b->a + diag_offset[row]; 687 pj = b->j + diag_offset[row] + 1; 688 multiplier = *pc / *pv++; 689 *pc = multiplier; 690 nz = bi[row + 1] - diag_offset[row] - 1; 691 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 692 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 693 } 694 row = *bjtmp++; 695 } 696 /* finished row so stick it into b->a */ 697 pv = b->a + bi[i]; 698 pj = b->j + bi[i]; 699 nz = bi[i + 1] - bi[i]; 700 diag = diag_offset[i] - bi[i]; 701 rs = 0.0; 702 for (j = 0; j < nz; j++) { 703 pv[j] = rtmp[pj[j]]; 704 rs += PetscAbsScalar(pv[j]); 705 } 706 rs -= PetscAbsScalar(pv[diag]); 707 708 sctx.rs = rs; 709 sctx.pv = pv[diag]; 710 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 711 if (sctx.newshift) break; 712 pv[diag] = sctx.pv; 713 } 714 715 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 716 /* 717 * if no shift in this attempt & shifting & started shifting & can refine, 718 * then try lower shift 719 */ 720 sctx.shift_hi = sctx.shift_fraction; 721 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 722 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 723 sctx.newshift = PETSC_TRUE; 724 sctx.nshift++; 725 } 726 } while (sctx.newshift); 727 728 /* invert diagonal entries for simpler triangular solves */ 729 for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]]; 730 PetscCall(PetscFree(rtmp)); 731 PetscCall(ISRestoreIndices(isicol, &ic)); 732 PetscCall(ISRestoreIndices(isrow, &r)); 733 734 PetscCall(ISIdentity(isrow, &row_identity)); 735 PetscCall(ISIdentity(isicol, &col_identity)); 736 if (row_identity && col_identity) { 737 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 738 } else { 739 C->ops->solve = MatSolve_SeqAIJ_inplace; 740 } 741 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 742 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 743 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 744 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 745 C->ops->matsolvetranspose = NULL; 746 747 C->assembled = PETSC_TRUE; 748 C->preallocated = PETSC_TRUE; 749 750 PetscCall(PetscLogFlops(C->cmap->n)); 751 if (sctx.nshift) { 752 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 753 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 754 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 755 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 756 } 757 } 758 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 759 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 760 761 PetscCall(MatSeqAIJCheckInode(C)); 762 PetscFunctionReturn(PETSC_SUCCESS); 763 } 764 765 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec); 766 767 /* 768 This routine implements inplace ILU(0) with row or/and column permutations. 769 Input: 770 A - original matrix 771 Output; 772 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 773 a->j (col index) is permuted by the inverse of colperm, then sorted 774 a->a reordered accordingly with a->j 775 a->diag (ptr to diagonal elements) is updated. 776 */ 777 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 778 { 779 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 780 IS isrow = a->row, isicol = a->icol; 781 const PetscInt *r, *ic, *ics; 782 PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 783 PetscInt *ajtmp, nz, row; 784 PetscInt *diag = a->diag, nbdiag, *pj; 785 PetscScalar *rtmp, *pc, multiplier, d; 786 MatScalar *pv, *v; 787 PetscReal rs; 788 FactorShiftCtx sctx; 789 const MatScalar *aa = a->a, *vtmp; 790 791 PetscFunctionBegin; 792 PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 793 794 /* MatPivotSetUp(): initialize shift context sctx */ 795 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 796 797 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 798 const PetscInt *ddiag = a->diag; 799 sctx.shift_top = info->zeropivot; 800 for (i = 0; i < n; i++) { 801 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 802 d = (aa)[ddiag[i]]; 803 rs = -PetscAbsScalar(d) - PetscRealPart(d); 804 vtmp = aa + ai[i]; 805 nz = ai[i + 1] - ai[i]; 806 for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 807 if (rs > sctx.shift_top) sctx.shift_top = rs; 808 } 809 sctx.shift_top *= 1.1; 810 sctx.nshift_max = 5; 811 sctx.shift_lo = 0.; 812 sctx.shift_hi = 1.; 813 } 814 815 PetscCall(ISGetIndices(isrow, &r)); 816 PetscCall(ISGetIndices(isicol, &ic)); 817 PetscCall(PetscMalloc1(n + 1, &rtmp)); 818 PetscCall(PetscArrayzero(rtmp, n + 1)); 819 ics = ic; 820 821 #if defined(MV) 822 sctx.shift_top = 0.; 823 sctx.nshift_max = 0; 824 sctx.shift_lo = 0.; 825 sctx.shift_hi = 0.; 826 sctx.shift_fraction = 0.; 827 828 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 829 sctx.shift_top = 0.; 830 for (i = 0; i < n; i++) { 831 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 832 d = (a->a)[diag[i]]; 833 rs = -PetscAbsScalar(d) - PetscRealPart(d); 834 v = a->a + ai[i]; 835 nz = ai[i + 1] - ai[i]; 836 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 837 if (rs > sctx.shift_top) sctx.shift_top = rs; 838 } 839 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 840 sctx.shift_top *= 1.1; 841 sctx.nshift_max = 5; 842 sctx.shift_lo = 0.; 843 sctx.shift_hi = 1.; 844 } 845 846 sctx.shift_amount = 0.; 847 sctx.nshift = 0; 848 #endif 849 850 do { 851 sctx.newshift = PETSC_FALSE; 852 for (i = 0; i < n; i++) { 853 /* load in initial unfactored row */ 854 nz = ai[r[i] + 1] - ai[r[i]]; 855 ajtmp = aj + ai[r[i]]; 856 v = a->a + ai[r[i]]; 857 /* sort permuted ajtmp and values v accordingly */ 858 for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 859 PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 860 861 diag[r[i]] = ai[r[i]]; 862 for (j = 0; j < nz; j++) { 863 rtmp[ajtmp[j]] = v[j]; 864 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 865 } 866 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 867 868 row = *ajtmp++; 869 while (row < i) { 870 pc = rtmp + row; 871 if (*pc != 0.0) { 872 pv = a->a + diag[r[row]]; 873 pj = aj + diag[r[row]] + 1; 874 875 multiplier = *pc / *pv++; 876 *pc = multiplier; 877 nz = ai[r[row] + 1] - diag[r[row]] - 1; 878 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 879 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 880 } 881 row = *ajtmp++; 882 } 883 /* finished row so overwrite it onto a->a */ 884 pv = a->a + ai[r[i]]; 885 pj = aj + ai[r[i]]; 886 nz = ai[r[i] + 1] - ai[r[i]]; 887 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 888 889 rs = 0.0; 890 for (j = 0; j < nz; j++) { 891 pv[j] = rtmp[pj[j]]; 892 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 893 } 894 895 sctx.rs = rs; 896 sctx.pv = pv[nbdiag]; 897 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 898 if (sctx.newshift) break; 899 pv[nbdiag] = sctx.pv; 900 } 901 902 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 903 /* 904 * if no shift in this attempt & shifting & started shifting & can refine, 905 * then try lower shift 906 */ 907 sctx.shift_hi = sctx.shift_fraction; 908 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 909 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 910 sctx.newshift = PETSC_TRUE; 911 sctx.nshift++; 912 } 913 } while (sctx.newshift); 914 915 /* invert diagonal entries for simpler triangular solves */ 916 for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]]; 917 918 PetscCall(PetscFree(rtmp)); 919 PetscCall(ISRestoreIndices(isicol, &ic)); 920 PetscCall(ISRestoreIndices(isrow, &r)); 921 922 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 923 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 924 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 925 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 926 927 A->assembled = PETSC_TRUE; 928 A->preallocated = PETSC_TRUE; 929 930 PetscCall(PetscLogFlops(A->cmap->n)); 931 if (sctx.nshift) { 932 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 933 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 934 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 935 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 936 } 937 } 938 PetscFunctionReturn(PETSC_SUCCESS); 939 } 940 941 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 942 { 943 Mat C; 944 945 PetscFunctionBegin; 946 PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 947 PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 948 PetscCall(MatLUFactorNumeric(C, A, info)); 949 950 A->ops->solve = C->ops->solve; 951 A->ops->solvetranspose = C->ops->solvetranspose; 952 953 PetscCall(MatHeaderMerge(A, &C)); 954 PetscFunctionReturn(PETSC_SUCCESS); 955 } 956 957 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 958 { 959 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 960 IS iscol = a->col, isrow = a->row; 961 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 962 PetscInt nz; 963 const PetscInt *rout, *cout, *r, *c; 964 PetscScalar *x, *tmp, *tmps, sum; 965 const PetscScalar *b; 966 const MatScalar *aa = a->a, *v; 967 968 PetscFunctionBegin; 969 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 970 971 PetscCall(VecGetArrayRead(bb, &b)); 972 PetscCall(VecGetArrayWrite(xx, &x)); 973 tmp = a->solve_work; 974 975 PetscCall(ISGetIndices(isrow, &rout)); 976 r = rout; 977 PetscCall(ISGetIndices(iscol, &cout)); 978 c = cout + (n - 1); 979 980 /* forward solve the lower triangular */ 981 tmp[0] = b[*r++]; 982 tmps = tmp; 983 for (i = 1; i < n; i++) { 984 v = aa + ai[i]; 985 vi = aj + ai[i]; 986 nz = a->diag[i] - ai[i]; 987 sum = b[*r++]; 988 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 989 tmp[i] = sum; 990 } 991 992 /* backward solve the upper triangular */ 993 for (i = n - 1; i >= 0; i--) { 994 v = aa + a->diag[i] + 1; 995 vi = aj + a->diag[i] + 1; 996 nz = ai[i + 1] - a->diag[i] - 1; 997 sum = tmp[i]; 998 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 999 x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 1000 } 1001 1002 PetscCall(ISRestoreIndices(isrow, &rout)); 1003 PetscCall(ISRestoreIndices(iscol, &cout)); 1004 PetscCall(VecRestoreArrayRead(bb, &b)); 1005 PetscCall(VecRestoreArrayWrite(xx, &x)); 1006 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1007 PetscFunctionReturn(PETSC_SUCCESS); 1008 } 1009 1010 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 1011 { 1012 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1013 IS iscol = a->col, isrow = a->row; 1014 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1015 PetscInt nz, neq, ldb, ldx; 1016 const PetscInt *rout, *cout, *r, *c; 1017 PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 1018 const PetscScalar *b, *aa = a->a, *v; 1019 PetscBool isdense; 1020 1021 PetscFunctionBegin; 1022 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1023 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1024 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1025 if (X != B) { 1026 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1027 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1028 } 1029 PetscCall(MatDenseGetArrayRead(B, &b)); 1030 PetscCall(MatDenseGetLDA(B, &ldb)); 1031 PetscCall(MatDenseGetArray(X, &x)); 1032 PetscCall(MatDenseGetLDA(X, &ldx)); 1033 PetscCall(ISGetIndices(isrow, &rout)); 1034 r = rout; 1035 PetscCall(ISGetIndices(iscol, &cout)); 1036 c = cout; 1037 for (neq = 0; neq < B->cmap->n; neq++) { 1038 /* forward solve the lower triangular */ 1039 tmp[0] = b[r[0]]; 1040 tmps = tmp; 1041 for (i = 1; i < n; i++) { 1042 v = aa + ai[i]; 1043 vi = aj + ai[i]; 1044 nz = a->diag[i] - ai[i]; 1045 sum = b[r[i]]; 1046 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1047 tmp[i] = sum; 1048 } 1049 /* backward solve the upper triangular */ 1050 for (i = n - 1; i >= 0; i--) { 1051 v = aa + a->diag[i] + 1; 1052 vi = aj + a->diag[i] + 1; 1053 nz = ai[i + 1] - a->diag[i] - 1; 1054 sum = tmp[i]; 1055 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1056 x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 1057 } 1058 b += ldb; 1059 x += ldx; 1060 } 1061 PetscCall(ISRestoreIndices(isrow, &rout)); 1062 PetscCall(ISRestoreIndices(iscol, &cout)); 1063 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1064 PetscCall(MatDenseRestoreArray(X, &x)); 1065 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1066 PetscFunctionReturn(PETSC_SUCCESS); 1067 } 1068 1069 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 1070 { 1071 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1072 IS iscol = a->col, isrow = a->row; 1073 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1074 PetscInt nz, neq, ldb, ldx; 1075 const PetscInt *rout, *cout, *r, *c; 1076 PetscScalar *x, *tmp = a->solve_work, sum; 1077 const PetscScalar *b, *aa = a->a, *v; 1078 PetscBool isdense; 1079 1080 PetscFunctionBegin; 1081 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1082 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1083 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1084 if (X != B) { 1085 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1086 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1087 } 1088 PetscCall(MatDenseGetArrayRead(B, &b)); 1089 PetscCall(MatDenseGetLDA(B, &ldb)); 1090 PetscCall(MatDenseGetArray(X, &x)); 1091 PetscCall(MatDenseGetLDA(X, &ldx)); 1092 PetscCall(ISGetIndices(isrow, &rout)); 1093 r = rout; 1094 PetscCall(ISGetIndices(iscol, &cout)); 1095 c = cout; 1096 for (neq = 0; neq < B->cmap->n; neq++) { 1097 /* forward solve the lower triangular */ 1098 tmp[0] = b[r[0]]; 1099 v = aa; 1100 vi = aj; 1101 for (i = 1; i < n; i++) { 1102 nz = ai[i + 1] - ai[i]; 1103 sum = b[r[i]]; 1104 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1105 tmp[i] = sum; 1106 v += nz; 1107 vi += nz; 1108 } 1109 /* backward solve the upper triangular */ 1110 for (i = n - 1; i >= 0; i--) { 1111 v = aa + adiag[i + 1] + 1; 1112 vi = aj + adiag[i + 1] + 1; 1113 nz = adiag[i] - adiag[i + 1] - 1; 1114 sum = tmp[i]; 1115 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1116 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 1117 } 1118 b += ldb; 1119 x += ldx; 1120 } 1121 PetscCall(ISRestoreIndices(isrow, &rout)); 1122 PetscCall(ISRestoreIndices(iscol, &cout)); 1123 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1124 PetscCall(MatDenseRestoreArray(X, &x)); 1125 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1126 PetscFunctionReturn(PETSC_SUCCESS); 1127 } 1128 1129 PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X) 1130 { 1131 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1132 IS iscol = a->col, isrow = a->row; 1133 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j; 1134 PetscInt nz, neq, ldb, ldx; 1135 const PetscInt *rout, *cout, *r, *c; 1136 PetscScalar *x, *tmp = a->solve_work, s1; 1137 const PetscScalar *b, *aa = a->a, *v; 1138 PetscBool isdense; 1139 1140 PetscFunctionBegin; 1141 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1142 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1143 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1144 if (X != B) { 1145 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1146 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1147 } 1148 PetscCall(MatDenseGetArrayRead(B, &b)); 1149 PetscCall(MatDenseGetLDA(B, &ldb)); 1150 PetscCall(MatDenseGetArray(X, &x)); 1151 PetscCall(MatDenseGetLDA(X, &ldx)); 1152 tmp = a->solve_work; 1153 PetscCall(ISGetIndices(isrow, &rout)); 1154 r = rout; 1155 PetscCall(ISGetIndices(iscol, &cout)); 1156 c = cout; 1157 for (neq = 0; neq < B->cmap->n; neq++) { 1158 /* copy the b into temp work space according to permutation */ 1159 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1160 1161 /* forward solve the U^T */ 1162 for (i = 0; i < n; i++) { 1163 v = aa + adiag[i + 1] + 1; 1164 vi = aj + adiag[i + 1] + 1; 1165 nz = adiag[i] - adiag[i + 1] - 1; 1166 s1 = tmp[i]; 1167 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1168 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1169 tmp[i] = s1; 1170 } 1171 1172 /* backward solve the L^T */ 1173 for (i = n - 1; i >= 0; i--) { 1174 v = aa + ai[i]; 1175 vi = aj + ai[i]; 1176 nz = ai[i + 1] - ai[i]; 1177 s1 = tmp[i]; 1178 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1179 } 1180 1181 /* copy tmp into x according to permutation */ 1182 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1183 b += ldb; 1184 x += ldx; 1185 } 1186 PetscCall(ISRestoreIndices(isrow, &rout)); 1187 PetscCall(ISRestoreIndices(iscol, &cout)); 1188 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1189 PetscCall(MatDenseRestoreArray(X, &x)); 1190 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1191 PetscFunctionReturn(PETSC_SUCCESS); 1192 } 1193 1194 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 1195 { 1196 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1197 IS iscol = a->col, isrow = a->row; 1198 const PetscInt *r, *c, *rout, *cout; 1199 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1200 PetscInt nz, row; 1201 PetscScalar *x, *tmp, *tmps, sum; 1202 const PetscScalar *b; 1203 const MatScalar *aa = a->a, *v; 1204 1205 PetscFunctionBegin; 1206 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1207 1208 PetscCall(VecGetArrayRead(bb, &b)); 1209 PetscCall(VecGetArrayWrite(xx, &x)); 1210 tmp = a->solve_work; 1211 1212 PetscCall(ISGetIndices(isrow, &rout)); 1213 r = rout; 1214 PetscCall(ISGetIndices(iscol, &cout)); 1215 c = cout + (n - 1); 1216 1217 /* forward solve the lower triangular */ 1218 tmp[0] = b[*r++]; 1219 tmps = tmp; 1220 for (row = 1; row < n; row++) { 1221 i = rout[row]; /* permuted row */ 1222 v = aa + ai[i]; 1223 vi = aj + ai[i]; 1224 nz = a->diag[i] - ai[i]; 1225 sum = b[*r++]; 1226 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1227 tmp[row] = sum; 1228 } 1229 1230 /* backward solve the upper triangular */ 1231 for (row = n - 1; row >= 0; row--) { 1232 i = rout[row]; /* permuted row */ 1233 v = aa + a->diag[i] + 1; 1234 vi = aj + a->diag[i] + 1; 1235 nz = ai[i + 1] - a->diag[i] - 1; 1236 sum = tmp[row]; 1237 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1238 x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1239 } 1240 1241 PetscCall(ISRestoreIndices(isrow, &rout)); 1242 PetscCall(ISRestoreIndices(iscol, &cout)); 1243 PetscCall(VecRestoreArrayRead(bb, &b)); 1244 PetscCall(VecRestoreArrayWrite(xx, &x)); 1245 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1246 PetscFunctionReturn(PETSC_SUCCESS); 1247 } 1248 1249 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1250 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1251 { 1252 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1253 PetscInt n = A->rmap->n; 1254 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1255 PetscScalar *x; 1256 const PetscScalar *b; 1257 const MatScalar *aa = a->a; 1258 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1259 PetscInt adiag_i, i, nz, ai_i; 1260 const PetscInt *vi; 1261 const MatScalar *v; 1262 PetscScalar sum; 1263 #endif 1264 1265 PetscFunctionBegin; 1266 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1267 1268 PetscCall(VecGetArrayRead(bb, &b)); 1269 PetscCall(VecGetArrayWrite(xx, &x)); 1270 1271 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1272 fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 1273 #else 1274 /* forward solve the lower triangular */ 1275 x[0] = b[0]; 1276 for (i = 1; i < n; i++) { 1277 ai_i = ai[i]; 1278 v = aa + ai_i; 1279 vi = aj + ai_i; 1280 nz = adiag[i] - ai_i; 1281 sum = b[i]; 1282 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1283 x[i] = sum; 1284 } 1285 1286 /* backward solve the upper triangular */ 1287 for (i = n - 1; i >= 0; i--) { 1288 adiag_i = adiag[i]; 1289 v = aa + adiag_i + 1; 1290 vi = aj + adiag_i + 1; 1291 nz = ai[i + 1] - adiag_i - 1; 1292 sum = x[i]; 1293 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1294 x[i] = sum * aa[adiag_i]; 1295 } 1296 #endif 1297 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1298 PetscCall(VecRestoreArrayRead(bb, &b)); 1299 PetscCall(VecRestoreArrayWrite(xx, &x)); 1300 PetscFunctionReturn(PETSC_SUCCESS); 1301 } 1302 1303 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1304 { 1305 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1306 IS iscol = a->col, isrow = a->row; 1307 PetscInt i, n = A->rmap->n, j; 1308 PetscInt nz; 1309 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 1310 PetscScalar *x, *tmp, sum; 1311 const PetscScalar *b; 1312 const MatScalar *aa = a->a, *v; 1313 1314 PetscFunctionBegin; 1315 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1316 1317 PetscCall(VecGetArrayRead(bb, &b)); 1318 PetscCall(VecGetArray(xx, &x)); 1319 tmp = a->solve_work; 1320 1321 PetscCall(ISGetIndices(isrow, &rout)); 1322 r = rout; 1323 PetscCall(ISGetIndices(iscol, &cout)); 1324 c = cout + (n - 1); 1325 1326 /* forward solve the lower triangular */ 1327 tmp[0] = b[*r++]; 1328 for (i = 1; i < n; i++) { 1329 v = aa + ai[i]; 1330 vi = aj + ai[i]; 1331 nz = a->diag[i] - ai[i]; 1332 sum = b[*r++]; 1333 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1334 tmp[i] = sum; 1335 } 1336 1337 /* backward solve the upper triangular */ 1338 for (i = n - 1; i >= 0; i--) { 1339 v = aa + a->diag[i] + 1; 1340 vi = aj + a->diag[i] + 1; 1341 nz = ai[i + 1] - a->diag[i] - 1; 1342 sum = tmp[i]; 1343 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1344 tmp[i] = sum * aa[a->diag[i]]; 1345 x[*c--] += tmp[i]; 1346 } 1347 1348 PetscCall(ISRestoreIndices(isrow, &rout)); 1349 PetscCall(ISRestoreIndices(iscol, &cout)); 1350 PetscCall(VecRestoreArrayRead(bb, &b)); 1351 PetscCall(VecRestoreArray(xx, &x)); 1352 PetscCall(PetscLogFlops(2.0 * a->nz)); 1353 PetscFunctionReturn(PETSC_SUCCESS); 1354 } 1355 1356 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1357 { 1358 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1359 IS iscol = a->col, isrow = a->row; 1360 PetscInt i, n = A->rmap->n, j; 1361 PetscInt nz; 1362 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1363 PetscScalar *x, *tmp, sum; 1364 const PetscScalar *b; 1365 const MatScalar *aa = a->a, *v; 1366 1367 PetscFunctionBegin; 1368 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1369 1370 PetscCall(VecGetArrayRead(bb, &b)); 1371 PetscCall(VecGetArray(xx, &x)); 1372 tmp = a->solve_work; 1373 1374 PetscCall(ISGetIndices(isrow, &rout)); 1375 r = rout; 1376 PetscCall(ISGetIndices(iscol, &cout)); 1377 c = cout; 1378 1379 /* forward solve the lower triangular */ 1380 tmp[0] = b[r[0]]; 1381 v = aa; 1382 vi = aj; 1383 for (i = 1; i < n; i++) { 1384 nz = ai[i + 1] - ai[i]; 1385 sum = b[r[i]]; 1386 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1387 tmp[i] = sum; 1388 v += nz; 1389 vi += nz; 1390 } 1391 1392 /* backward solve the upper triangular */ 1393 v = aa + adiag[n - 1]; 1394 vi = aj + adiag[n - 1]; 1395 for (i = n - 1; i >= 0; i--) { 1396 nz = adiag[i] - adiag[i + 1] - 1; 1397 sum = tmp[i]; 1398 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1399 tmp[i] = sum * v[nz]; 1400 x[c[i]] += tmp[i]; 1401 v += nz + 1; 1402 vi += nz + 1; 1403 } 1404 1405 PetscCall(ISRestoreIndices(isrow, &rout)); 1406 PetscCall(ISRestoreIndices(iscol, &cout)); 1407 PetscCall(VecRestoreArrayRead(bb, &b)); 1408 PetscCall(VecRestoreArray(xx, &x)); 1409 PetscCall(PetscLogFlops(2.0 * a->nz)); 1410 PetscFunctionReturn(PETSC_SUCCESS); 1411 } 1412 1413 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1414 { 1415 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1416 IS iscol = a->col, isrow = a->row; 1417 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1418 PetscInt i, n = A->rmap->n, j; 1419 PetscInt nz; 1420 PetscScalar *x, *tmp, s1; 1421 const MatScalar *aa = a->a, *v; 1422 const PetscScalar *b; 1423 1424 PetscFunctionBegin; 1425 PetscCall(VecGetArrayRead(bb, &b)); 1426 PetscCall(VecGetArrayWrite(xx, &x)); 1427 tmp = a->solve_work; 1428 1429 PetscCall(ISGetIndices(isrow, &rout)); 1430 r = rout; 1431 PetscCall(ISGetIndices(iscol, &cout)); 1432 c = cout; 1433 1434 /* copy the b into temp work space according to permutation */ 1435 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1436 1437 /* forward solve the U^T */ 1438 for (i = 0; i < n; i++) { 1439 v = aa + diag[i]; 1440 vi = aj + diag[i] + 1; 1441 nz = ai[i + 1] - diag[i] - 1; 1442 s1 = tmp[i]; 1443 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1444 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1445 tmp[i] = s1; 1446 } 1447 1448 /* backward solve the L^T */ 1449 for (i = n - 1; i >= 0; i--) { 1450 v = aa + diag[i] - 1; 1451 vi = aj + diag[i] - 1; 1452 nz = diag[i] - ai[i]; 1453 s1 = tmp[i]; 1454 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1455 } 1456 1457 /* copy tmp into x according to permutation */ 1458 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1459 1460 PetscCall(ISRestoreIndices(isrow, &rout)); 1461 PetscCall(ISRestoreIndices(iscol, &cout)); 1462 PetscCall(VecRestoreArrayRead(bb, &b)); 1463 PetscCall(VecRestoreArrayWrite(xx, &x)); 1464 1465 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1466 PetscFunctionReturn(PETSC_SUCCESS); 1467 } 1468 1469 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1470 { 1471 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1472 IS iscol = a->col, isrow = a->row; 1473 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1474 PetscInt i, n = A->rmap->n, j; 1475 PetscInt nz; 1476 PetscScalar *x, *tmp, s1; 1477 const MatScalar *aa = a->a, *v; 1478 const PetscScalar *b; 1479 1480 PetscFunctionBegin; 1481 PetscCall(VecGetArrayRead(bb, &b)); 1482 PetscCall(VecGetArrayWrite(xx, &x)); 1483 tmp = a->solve_work; 1484 1485 PetscCall(ISGetIndices(isrow, &rout)); 1486 r = rout; 1487 PetscCall(ISGetIndices(iscol, &cout)); 1488 c = cout; 1489 1490 /* copy the b into temp work space according to permutation */ 1491 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1492 1493 /* forward solve the U^T */ 1494 for (i = 0; i < n; i++) { 1495 v = aa + adiag[i + 1] + 1; 1496 vi = aj + adiag[i + 1] + 1; 1497 nz = adiag[i] - adiag[i + 1] - 1; 1498 s1 = tmp[i]; 1499 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1500 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1501 tmp[i] = s1; 1502 } 1503 1504 /* backward solve the L^T */ 1505 for (i = n - 1; i >= 0; i--) { 1506 v = aa + ai[i]; 1507 vi = aj + ai[i]; 1508 nz = ai[i + 1] - ai[i]; 1509 s1 = tmp[i]; 1510 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1511 } 1512 1513 /* copy tmp into x according to permutation */ 1514 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1515 1516 PetscCall(ISRestoreIndices(isrow, &rout)); 1517 PetscCall(ISRestoreIndices(iscol, &cout)); 1518 PetscCall(VecRestoreArrayRead(bb, &b)); 1519 PetscCall(VecRestoreArrayWrite(xx, &x)); 1520 1521 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1522 PetscFunctionReturn(PETSC_SUCCESS); 1523 } 1524 1525 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1526 { 1527 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1528 IS iscol = a->col, isrow = a->row; 1529 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1530 PetscInt i, n = A->rmap->n, j; 1531 PetscInt nz; 1532 PetscScalar *x, *tmp, s1; 1533 const MatScalar *aa = a->a, *v; 1534 const PetscScalar *b; 1535 1536 PetscFunctionBegin; 1537 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1538 PetscCall(VecGetArrayRead(bb, &b)); 1539 PetscCall(VecGetArray(xx, &x)); 1540 tmp = a->solve_work; 1541 1542 PetscCall(ISGetIndices(isrow, &rout)); 1543 r = rout; 1544 PetscCall(ISGetIndices(iscol, &cout)); 1545 c = cout; 1546 1547 /* copy the b into temp work space according to permutation */ 1548 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1549 1550 /* forward solve the U^T */ 1551 for (i = 0; i < n; i++) { 1552 v = aa + diag[i]; 1553 vi = aj + diag[i] + 1; 1554 nz = ai[i + 1] - diag[i] - 1; 1555 s1 = tmp[i]; 1556 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1557 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1558 tmp[i] = s1; 1559 } 1560 1561 /* backward solve the L^T */ 1562 for (i = n - 1; i >= 0; i--) { 1563 v = aa + diag[i] - 1; 1564 vi = aj + diag[i] - 1; 1565 nz = diag[i] - ai[i]; 1566 s1 = tmp[i]; 1567 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1568 } 1569 1570 /* copy tmp into x according to permutation */ 1571 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1572 1573 PetscCall(ISRestoreIndices(isrow, &rout)); 1574 PetscCall(ISRestoreIndices(iscol, &cout)); 1575 PetscCall(VecRestoreArrayRead(bb, &b)); 1576 PetscCall(VecRestoreArray(xx, &x)); 1577 1578 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1579 PetscFunctionReturn(PETSC_SUCCESS); 1580 } 1581 1582 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1583 { 1584 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1585 IS iscol = a->col, isrow = a->row; 1586 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1587 PetscInt i, n = A->rmap->n, j; 1588 PetscInt nz; 1589 PetscScalar *x, *tmp, s1; 1590 const MatScalar *aa = a->a, *v; 1591 const PetscScalar *b; 1592 1593 PetscFunctionBegin; 1594 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1595 PetscCall(VecGetArrayRead(bb, &b)); 1596 PetscCall(VecGetArray(xx, &x)); 1597 tmp = a->solve_work; 1598 1599 PetscCall(ISGetIndices(isrow, &rout)); 1600 r = rout; 1601 PetscCall(ISGetIndices(iscol, &cout)); 1602 c = cout; 1603 1604 /* copy the b into temp work space according to permutation */ 1605 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1606 1607 /* forward solve the U^T */ 1608 for (i = 0; i < n; i++) { 1609 v = aa + adiag[i + 1] + 1; 1610 vi = aj + adiag[i + 1] + 1; 1611 nz = adiag[i] - adiag[i + 1] - 1; 1612 s1 = tmp[i]; 1613 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1614 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1615 tmp[i] = s1; 1616 } 1617 1618 /* backward solve the L^T */ 1619 for (i = n - 1; i >= 0; i--) { 1620 v = aa + ai[i]; 1621 vi = aj + ai[i]; 1622 nz = ai[i + 1] - ai[i]; 1623 s1 = tmp[i]; 1624 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1625 } 1626 1627 /* copy tmp into x according to permutation */ 1628 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1629 1630 PetscCall(ISRestoreIndices(isrow, &rout)); 1631 PetscCall(ISRestoreIndices(iscol, &cout)); 1632 PetscCall(VecRestoreArrayRead(bb, &b)); 1633 PetscCall(VecRestoreArray(xx, &x)); 1634 1635 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1636 PetscFunctionReturn(PETSC_SUCCESS); 1637 } 1638 1639 /* 1640 ilu() under revised new data structure. 1641 Factored arrays bj and ba are stored as 1642 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1643 1644 bi=fact->i is an array of size n+1, in which 1645 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1646 bi[n]: points to L(n-1,n-1)+1 1647 1648 bdiag=fact->diag is an array of size n+1,in which 1649 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1650 bdiag[n]: points to entry of U(n-1,0)-1 1651 1652 U(i,:) contains bdiag[i] as its last entry, i.e., 1653 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1654 */ 1655 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1656 { 1657 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1658 const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1659 PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 1660 IS isicol; 1661 1662 PetscFunctionBegin; 1663 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1664 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1665 b = (Mat_SeqAIJ *)(fact)->data; 1666 1667 /* allocate matrix arrays for new data structure */ 1668 PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscScalar), (void **)&b->a)); 1669 PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscInt), (void **)&b->j)); 1670 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i)); 1671 if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1672 b->free_a = PETSC_TRUE; 1673 b->free_ij = PETSC_TRUE; 1674 1675 if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag)); 1676 bdiag = b->diag; 1677 1678 /* set bi and bj with new data structure */ 1679 bi = b->i; 1680 bj = b->j; 1681 1682 /* L part */ 1683 bi[0] = 0; 1684 for (i = 0; i < n; i++) { 1685 nz = adiag[i] - ai[i]; 1686 bi[i + 1] = bi[i] + nz; 1687 aj = a->j + ai[i]; 1688 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1689 } 1690 1691 /* U part */ 1692 bdiag[n] = bi[n] - 1; 1693 for (i = n - 1; i >= 0; i--) { 1694 nz = ai[i + 1] - adiag[i] - 1; 1695 aj = a->j + adiag[i] + 1; 1696 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1697 /* diag[i] */ 1698 bj[k++] = i; 1699 bdiag[i] = bdiag[i + 1] + nz + 1; 1700 } 1701 1702 fact->factortype = MAT_FACTOR_ILU; 1703 fact->info.factor_mallocs = 0; 1704 fact->info.fill_ratio_given = info->fill; 1705 fact->info.fill_ratio_needed = 1.0; 1706 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1707 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1708 1709 b = (Mat_SeqAIJ *)(fact)->data; 1710 b->row = isrow; 1711 b->col = iscol; 1712 b->icol = isicol; 1713 PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work)); 1714 PetscCall(PetscObjectReference((PetscObject)isrow)); 1715 PetscCall(PetscObjectReference((PetscObject)iscol)); 1716 PetscFunctionReturn(PETSC_SUCCESS); 1717 } 1718 1719 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1720 { 1721 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1722 IS isicol; 1723 const PetscInt *r, *ic; 1724 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1725 PetscInt *bi, *cols, nnz, *cols_lvl; 1726 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1727 PetscInt i, levels, diagonal_fill; 1728 PetscBool col_identity, row_identity, missing; 1729 PetscReal f; 1730 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1731 PetscBT lnkbt; 1732 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1733 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1734 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1735 1736 PetscFunctionBegin; 1737 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1738 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1739 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1740 1741 levels = (PetscInt)info->levels; 1742 PetscCall(ISIdentity(isrow, &row_identity)); 1743 PetscCall(ISIdentity(iscol, &col_identity)); 1744 if (!levels && row_identity && col_identity) { 1745 /* special case: ilu(0) with natural ordering */ 1746 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1747 if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1748 PetscFunctionReturn(PETSC_SUCCESS); 1749 } 1750 1751 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1752 PetscCall(ISGetIndices(isrow, &r)); 1753 PetscCall(ISGetIndices(isicol, &ic)); 1754 1755 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1756 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 1757 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1758 bi[0] = bdiag[0] = 0; 1759 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1760 1761 /* create a linked list for storing column indices of the active row */ 1762 nlnk = n + 1; 1763 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1764 1765 /* initial FreeSpace size is f*(ai[n]+1) */ 1766 f = info->fill; 1767 diagonal_fill = (PetscInt)info->diagonal_fill; 1768 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1769 current_space = free_space; 1770 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1771 current_space_lvl = free_space_lvl; 1772 for (i = 0; i < n; i++) { 1773 nzi = 0; 1774 /* copy current row into linked list */ 1775 nnz = ai[r[i] + 1] - ai[r[i]]; 1776 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1777 cols = aj + ai[r[i]]; 1778 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1779 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1780 nzi += nlnk; 1781 1782 /* make sure diagonal entry is included */ 1783 if (diagonal_fill && lnk[i] == -1) { 1784 fm = n; 1785 while (lnk[fm] < i) fm = lnk[fm]; 1786 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1787 lnk[fm] = i; 1788 lnk_lvl[i] = 0; 1789 nzi++; 1790 dcount++; 1791 } 1792 1793 /* add pivot rows into the active row */ 1794 nzbd = 0; 1795 prow = lnk[n]; 1796 while (prow < i) { 1797 nnz = bdiag[prow]; 1798 cols = bj_ptr[prow] + nnz + 1; 1799 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1800 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1801 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1802 nzi += nlnk; 1803 prow = lnk[prow]; 1804 nzbd++; 1805 } 1806 bdiag[i] = nzbd; 1807 bi[i + 1] = bi[i] + nzi; 1808 /* if free space is not available, make more free space */ 1809 if (current_space->local_remaining < nzi) { 1810 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 1811 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1812 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1813 reallocs++; 1814 } 1815 1816 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1817 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1818 bj_ptr[i] = current_space->array; 1819 bjlvl_ptr[i] = current_space_lvl->array; 1820 1821 /* make sure the active row i has diagonal entry */ 1822 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1823 1824 current_space->array += nzi; 1825 current_space->local_used += nzi; 1826 current_space->local_remaining -= nzi; 1827 current_space_lvl->array += nzi; 1828 current_space_lvl->local_used += nzi; 1829 current_space_lvl->local_remaining -= nzi; 1830 } 1831 1832 PetscCall(ISRestoreIndices(isrow, &r)); 1833 PetscCall(ISRestoreIndices(isicol, &ic)); 1834 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1835 PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj)); 1836 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1837 1838 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1839 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1840 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1841 1842 #if defined(PETSC_USE_INFO) 1843 { 1844 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1845 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1846 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1847 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1848 PetscCall(PetscInfo(A, "for best performance.\n")); 1849 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1850 } 1851 #endif 1852 /* put together the new matrix */ 1853 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1854 b = (Mat_SeqAIJ *)(fact)->data; 1855 b->free_ij = PETSC_TRUE; 1856 PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 1857 b->free_a = PETSC_TRUE; 1858 b->j = bj; 1859 b->i = bi; 1860 b->diag = bdiag; 1861 b->ilen = NULL; 1862 b->imax = NULL; 1863 b->row = isrow; 1864 b->col = iscol; 1865 PetscCall(PetscObjectReference((PetscObject)isrow)); 1866 PetscCall(PetscObjectReference((PetscObject)iscol)); 1867 b->icol = isicol; 1868 1869 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 1870 /* In b structure: Free imax, ilen, old a, old j. 1871 Allocate bdiag, solve_work, new a, new j */ 1872 b->maxnz = b->nz = bdiag[0] + 1; 1873 1874 (fact)->info.factor_mallocs = reallocs; 1875 (fact)->info.fill_ratio_given = f; 1876 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1877 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1878 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1879 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 #if 0 1884 // unused 1885 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1886 { 1887 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1888 IS isicol; 1889 const PetscInt *r, *ic; 1890 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1891 PetscInt *bi, *cols, nnz, *cols_lvl; 1892 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1893 PetscInt i, levels, diagonal_fill; 1894 PetscBool col_identity, row_identity; 1895 PetscReal f; 1896 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1897 PetscBT lnkbt; 1898 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1899 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1900 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1901 PetscBool missing; 1902 1903 PetscFunctionBegin; 1904 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1905 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1906 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1907 1908 f = info->fill; 1909 levels = (PetscInt)info->levels; 1910 diagonal_fill = (PetscInt)info->diagonal_fill; 1911 1912 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1913 1914 PetscCall(ISIdentity(isrow, &row_identity)); 1915 PetscCall(ISIdentity(iscol, &col_identity)); 1916 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1917 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 1918 1919 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1920 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1921 fact->factortype = MAT_FACTOR_ILU; 1922 (fact)->info.factor_mallocs = 0; 1923 (fact)->info.fill_ratio_given = info->fill; 1924 (fact)->info.fill_ratio_needed = 1.0; 1925 1926 b = (Mat_SeqAIJ *)(fact)->data; 1927 b->row = isrow; 1928 b->col = iscol; 1929 b->icol = isicol; 1930 PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 1931 PetscCall(PetscObjectReference((PetscObject)isrow)); 1932 PetscCall(PetscObjectReference((PetscObject)iscol)); 1933 PetscFunctionReturn(PETSC_SUCCESS); 1934 } 1935 1936 PetscCall(ISGetIndices(isrow, &r)); 1937 PetscCall(ISGetIndices(isicol, &ic)); 1938 1939 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1940 PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi)); 1941 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1942 bi[0] = bdiag[0] = 0; 1943 1944 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1945 1946 /* create a linked list for storing column indices of the active row */ 1947 nlnk = n + 1; 1948 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1949 1950 /* initial FreeSpace size is f*(ai[n]+1) */ 1951 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1952 current_space = free_space; 1953 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1954 current_space_lvl = free_space_lvl; 1955 1956 for (i = 0; i < n; i++) { 1957 nzi = 0; 1958 /* copy current row into linked list */ 1959 nnz = ai[r[i] + 1] - ai[r[i]]; 1960 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1961 cols = aj + ai[r[i]]; 1962 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1963 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1964 nzi += nlnk; 1965 1966 /* make sure diagonal entry is included */ 1967 if (diagonal_fill && lnk[i] == -1) { 1968 fm = n; 1969 while (lnk[fm] < i) fm = lnk[fm]; 1970 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1971 lnk[fm] = i; 1972 lnk_lvl[i] = 0; 1973 nzi++; 1974 dcount++; 1975 } 1976 1977 /* add pivot rows into the active row */ 1978 nzbd = 0; 1979 prow = lnk[n]; 1980 while (prow < i) { 1981 nnz = bdiag[prow]; 1982 cols = bj_ptr[prow] + nnz + 1; 1983 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1984 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1985 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1986 nzi += nlnk; 1987 prow = lnk[prow]; 1988 nzbd++; 1989 } 1990 bdiag[i] = nzbd; 1991 bi[i + 1] = bi[i] + nzi; 1992 1993 /* if free space is not available, make more free space */ 1994 if (current_space->local_remaining < nzi) { 1995 nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 1996 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1997 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1998 reallocs++; 1999 } 2000 2001 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2002 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2003 bj_ptr[i] = current_space->array; 2004 bjlvl_ptr[i] = current_space_lvl->array; 2005 2006 /* make sure the active row i has diagonal entry */ 2007 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 2008 2009 current_space->array += nzi; 2010 current_space->local_used += nzi; 2011 current_space->local_remaining -= nzi; 2012 current_space_lvl->array += nzi; 2013 current_space_lvl->local_used += nzi; 2014 current_space_lvl->local_remaining -= nzi; 2015 } 2016 2017 PetscCall(ISRestoreIndices(isrow, &r)); 2018 PetscCall(ISRestoreIndices(isicol, &ic)); 2019 2020 /* destroy list of free space and other temporary arrays */ 2021 PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj)); 2022 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 2023 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2024 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2025 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 2026 2027 #if defined(PETSC_USE_INFO) 2028 { 2029 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2030 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 2031 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 2032 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 2033 PetscCall(PetscInfo(A, "for best performance.\n")); 2034 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 2035 } 2036 #endif 2037 2038 /* put together the new matrix */ 2039 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 2040 b = (Mat_SeqAIJ *)(fact)->data; 2041 b->free_ij = PETSC_TRUE; 2042 PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a)); 2043 b->free_a = PETSC_TRUE; 2044 b->j = bj; 2045 b->i = bi; 2046 for (i = 0; i < n; i++) bdiag[i] += bi[i]; 2047 b->diag = bdiag; 2048 b->ilen = NULL; 2049 b->imax = NULL; 2050 b->row = isrow; 2051 b->col = iscol; 2052 PetscCall(PetscObjectReference((PetscObject)isrow)); 2053 PetscCall(PetscObjectReference((PetscObject)iscol)); 2054 b->icol = isicol; 2055 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 2056 /* In b structure: Free imax, ilen, old a, old j. 2057 Allocate bdiag, solve_work, new a, new j */ 2058 b->maxnz = b->nz = bi[n]; 2059 2060 (fact)->info.factor_mallocs = reallocs; 2061 (fact)->info.fill_ratio_given = f; 2062 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2063 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2064 if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2065 PetscFunctionReturn(PETSC_SUCCESS); 2066 } 2067 #endif 2068 2069 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 2070 { 2071 Mat C = B; 2072 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2073 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2074 IS ip = b->row, iip = b->icol; 2075 const PetscInt *rip, *riip; 2076 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 2077 PetscInt *ai = a->i, *aj = a->j; 2078 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 2079 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2080 PetscBool perm_identity; 2081 FactorShiftCtx sctx; 2082 PetscReal rs; 2083 MatScalar d, *v; 2084 2085 PetscFunctionBegin; 2086 /* MatPivotSetUp(): initialize shift context sctx */ 2087 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2088 2089 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2090 sctx.shift_top = info->zeropivot; 2091 for (i = 0; i < mbs; i++) { 2092 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2093 d = (aa)[a->diag[i]]; 2094 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2095 v = aa + ai[i]; 2096 nz = ai[i + 1] - ai[i]; 2097 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2098 if (rs > sctx.shift_top) sctx.shift_top = rs; 2099 } 2100 sctx.shift_top *= 1.1; 2101 sctx.nshift_max = 5; 2102 sctx.shift_lo = 0.; 2103 sctx.shift_hi = 1.; 2104 } 2105 2106 PetscCall(ISGetIndices(ip, &rip)); 2107 PetscCall(ISGetIndices(iip, &riip)); 2108 2109 /* allocate working arrays 2110 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2111 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2112 */ 2113 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 2114 2115 do { 2116 sctx.newshift = PETSC_FALSE; 2117 2118 for (i = 0; i < mbs; i++) c2r[i] = mbs; 2119 if (mbs) il[0] = 0; 2120 2121 for (k = 0; k < mbs; k++) { 2122 /* zero rtmp */ 2123 nz = bi[k + 1] - bi[k]; 2124 bjtmp = bj + bi[k]; 2125 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2126 2127 /* load in initial unfactored row */ 2128 bval = ba + bi[k]; 2129 jmin = ai[rip[k]]; 2130 jmax = ai[rip[k] + 1]; 2131 for (j = jmin; j < jmax; j++) { 2132 col = riip[aj[j]]; 2133 if (col >= k) { /* only take upper triangular entry */ 2134 rtmp[col] = aa[j]; 2135 *bval++ = 0.0; /* for in-place factorization */ 2136 } 2137 } 2138 /* shift the diagonal of the matrix: ZeropivotApply() */ 2139 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2140 2141 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2142 dk = rtmp[k]; 2143 i = c2r[k]; /* first row to be added to k_th row */ 2144 2145 while (i < k) { 2146 nexti = c2r[i]; /* next row to be added to k_th row */ 2147 2148 /* compute multiplier, update diag(k) and U(i,k) */ 2149 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2150 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 2151 dk += uikdi * ba[ili]; /* update diag[k] */ 2152 ba[ili] = uikdi; /* -U(i,k) */ 2153 2154 /* add multiple of row i to k-th row */ 2155 jmin = ili + 1; 2156 jmax = bi[i + 1]; 2157 if (jmin < jmax) { 2158 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2159 /* update il and c2r for row i */ 2160 il[i] = jmin; 2161 j = bj[jmin]; 2162 c2r[i] = c2r[j]; 2163 c2r[j] = i; 2164 } 2165 i = nexti; 2166 } 2167 2168 /* copy data into U(k,:) */ 2169 rs = 0.0; 2170 jmin = bi[k]; 2171 jmax = bi[k + 1] - 1; 2172 if (jmin < jmax) { 2173 for (j = jmin; j < jmax; j++) { 2174 col = bj[j]; 2175 ba[j] = rtmp[col]; 2176 rs += PetscAbsScalar(ba[j]); 2177 } 2178 /* add the k-th row into il and c2r */ 2179 il[k] = jmin; 2180 i = bj[jmin]; 2181 c2r[k] = c2r[i]; 2182 c2r[i] = k; 2183 } 2184 2185 /* MatPivotCheck() */ 2186 sctx.rs = rs; 2187 sctx.pv = dk; 2188 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 2189 if (sctx.newshift) break; 2190 dk = sctx.pv; 2191 2192 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 2193 } 2194 } while (sctx.newshift); 2195 2196 PetscCall(PetscFree3(rtmp, il, c2r)); 2197 PetscCall(ISRestoreIndices(ip, &rip)); 2198 PetscCall(ISRestoreIndices(iip, &riip)); 2199 2200 PetscCall(ISIdentity(ip, &perm_identity)); 2201 if (perm_identity) { 2202 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2203 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2204 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2205 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2206 } else { 2207 B->ops->solve = MatSolve_SeqSBAIJ_1; 2208 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2209 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2210 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2211 } 2212 2213 C->assembled = PETSC_TRUE; 2214 C->preallocated = PETSC_TRUE; 2215 2216 PetscCall(PetscLogFlops(C->rmap->n)); 2217 2218 /* MatPivotView() */ 2219 if (sctx.nshift) { 2220 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2221 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2222 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2223 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2224 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2225 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 2226 } 2227 } 2228 PetscFunctionReturn(PETSC_SUCCESS); 2229 } 2230 2231 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 2232 { 2233 Mat C = B; 2234 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2235 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2236 IS ip = b->row, iip = b->icol; 2237 const PetscInt *rip, *riip; 2238 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 2239 PetscInt *ai = a->i, *aj = a->j; 2240 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2241 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 2242 PetscBool perm_identity; 2243 FactorShiftCtx sctx; 2244 PetscReal rs; 2245 MatScalar d, *v; 2246 2247 PetscFunctionBegin; 2248 /* MatPivotSetUp(): initialize shift context sctx */ 2249 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2250 2251 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2252 sctx.shift_top = info->zeropivot; 2253 for (i = 0; i < mbs; i++) { 2254 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2255 d = (aa)[a->diag[i]]; 2256 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2257 v = aa + ai[i]; 2258 nz = ai[i + 1] - ai[i]; 2259 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2260 if (rs > sctx.shift_top) sctx.shift_top = rs; 2261 } 2262 sctx.shift_top *= 1.1; 2263 sctx.nshift_max = 5; 2264 sctx.shift_lo = 0.; 2265 sctx.shift_hi = 1.; 2266 } 2267 2268 PetscCall(ISGetIndices(ip, &rip)); 2269 PetscCall(ISGetIndices(iip, &riip)); 2270 2271 /* initialization */ 2272 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 2273 2274 do { 2275 sctx.newshift = PETSC_FALSE; 2276 2277 for (i = 0; i < mbs; i++) jl[i] = mbs; 2278 il[0] = 0; 2279 2280 for (k = 0; k < mbs; k++) { 2281 /* zero rtmp */ 2282 nz = bi[k + 1] - bi[k]; 2283 bjtmp = bj + bi[k]; 2284 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2285 2286 bval = ba + bi[k]; 2287 /* initialize k-th row by the perm[k]-th row of A */ 2288 jmin = ai[rip[k]]; 2289 jmax = ai[rip[k] + 1]; 2290 for (j = jmin; j < jmax; j++) { 2291 col = riip[aj[j]]; 2292 if (col >= k) { /* only take upper triangular entry */ 2293 rtmp[col] = aa[j]; 2294 *bval++ = 0.0; /* for in-place factorization */ 2295 } 2296 } 2297 /* shift the diagonal of the matrix */ 2298 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2299 2300 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2301 dk = rtmp[k]; 2302 i = jl[k]; /* first row to be added to k_th row */ 2303 2304 while (i < k) { 2305 nexti = jl[i]; /* next row to be added to k_th row */ 2306 2307 /* compute multiplier, update diag(k) and U(i,k) */ 2308 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2309 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 2310 dk += uikdi * ba[ili]; 2311 ba[ili] = uikdi; /* -U(i,k) */ 2312 2313 /* add multiple of row i to k-th row */ 2314 jmin = ili + 1; 2315 jmax = bi[i + 1]; 2316 if (jmin < jmax) { 2317 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2318 /* update il and jl for row i */ 2319 il[i] = jmin; 2320 j = bj[jmin]; 2321 jl[i] = jl[j]; 2322 jl[j] = i; 2323 } 2324 i = nexti; 2325 } 2326 2327 /* shift the diagonals when zero pivot is detected */ 2328 /* compute rs=sum of abs(off-diagonal) */ 2329 rs = 0.0; 2330 jmin = bi[k] + 1; 2331 nz = bi[k + 1] - jmin; 2332 bcol = bj + jmin; 2333 for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 2334 2335 sctx.rs = rs; 2336 sctx.pv = dk; 2337 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 2338 if (sctx.newshift) break; 2339 dk = sctx.pv; 2340 2341 /* copy data into U(k,:) */ 2342 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 2343 jmin = bi[k] + 1; 2344 jmax = bi[k + 1]; 2345 if (jmin < jmax) { 2346 for (j = jmin; j < jmax; j++) { 2347 col = bj[j]; 2348 ba[j] = rtmp[col]; 2349 } 2350 /* add the k-th row into il and jl */ 2351 il[k] = jmin; 2352 i = bj[jmin]; 2353 jl[k] = jl[i]; 2354 jl[i] = k; 2355 } 2356 } 2357 } while (sctx.newshift); 2358 2359 PetscCall(PetscFree3(rtmp, il, jl)); 2360 PetscCall(ISRestoreIndices(ip, &rip)); 2361 PetscCall(ISRestoreIndices(iip, &riip)); 2362 2363 PetscCall(ISIdentity(ip, &perm_identity)); 2364 if (perm_identity) { 2365 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2366 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2367 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2368 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2369 } else { 2370 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2371 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2372 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2373 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2374 } 2375 2376 C->assembled = PETSC_TRUE; 2377 C->preallocated = PETSC_TRUE; 2378 2379 PetscCall(PetscLogFlops(C->rmap->n)); 2380 if (sctx.nshift) { 2381 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2382 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2383 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2384 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2385 } 2386 } 2387 PetscFunctionReturn(PETSC_SUCCESS); 2388 } 2389 2390 /* 2391 icc() under revised new data structure. 2392 Factored arrays bj and ba are stored as 2393 U(0,:),...,U(i,:),U(n-1,:) 2394 2395 ui=fact->i is an array of size n+1, in which 2396 ui+ 2397 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2398 ui[n]: points to U(n-1,n-1)+1 2399 2400 udiag=fact->diag is an array of size n,in which 2401 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2402 2403 U(i,:) contains udiag[i] as its last entry, i.e., 2404 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2405 */ 2406 2407 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2408 { 2409 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2410 Mat_SeqSBAIJ *b; 2411 PetscBool perm_identity, missing; 2412 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2413 const PetscInt *rip, *riip; 2414 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2415 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2416 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2417 PetscReal fill = info->fill, levels = info->levels; 2418 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2419 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2420 PetscBT lnkbt; 2421 IS iperm; 2422 2423 PetscFunctionBegin; 2424 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2425 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2426 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2427 PetscCall(ISIdentity(perm, &perm_identity)); 2428 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2429 2430 PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 2431 PetscCall(PetscMalloc1(am + 1, &udiag)); 2432 ui[0] = 0; 2433 2434 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2435 if (!levels && perm_identity) { 2436 for (i = 0; i < am; i++) { 2437 ncols = ai[i + 1] - a->diag[i]; 2438 ui[i + 1] = ui[i] + ncols; 2439 udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2440 } 2441 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2442 cols = uj; 2443 for (i = 0; i < am; i++) { 2444 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2445 ncols = ai[i + 1] - a->diag[i] - 1; 2446 for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2447 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2448 } 2449 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2450 PetscCall(ISGetIndices(iperm, &riip)); 2451 PetscCall(ISGetIndices(perm, &rip)); 2452 2453 /* initialization */ 2454 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2455 2456 /* jl: linked list for storing indices of the pivot rows 2457 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2458 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2459 for (i = 0; i < am; i++) { 2460 jl[i] = am; 2461 il[i] = 0; 2462 } 2463 2464 /* create and initialize a linked list for storing column indices of the active row k */ 2465 nlnk = am + 1; 2466 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2467 2468 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2469 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2470 current_space = free_space; 2471 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 2472 current_space_lvl = free_space_lvl; 2473 2474 for (k = 0; k < am; k++) { /* for each active row k */ 2475 /* initialize lnk by the column indices of row rip[k] of A */ 2476 nzk = 0; 2477 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2478 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2479 ncols_upper = 0; 2480 for (j = 0; j < ncols; j++) { 2481 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2482 if (riip[i] >= k) { /* only take upper triangular entry */ 2483 ajtmp[ncols_upper] = i; 2484 ncols_upper++; 2485 } 2486 } 2487 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2488 nzk += nlnk; 2489 2490 /* update lnk by computing fill-in for each pivot row to be merged in */ 2491 prow = jl[k]; /* 1st pivot row */ 2492 2493 while (prow < k) { 2494 nextprow = jl[prow]; 2495 2496 /* merge prow into k-th row */ 2497 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2498 jmax = ui[prow + 1]; 2499 ncols = jmax - jmin; 2500 i = jmin - ui[prow]; 2501 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2502 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2503 j = *(uj - 1); 2504 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2505 nzk += nlnk; 2506 2507 /* update il and jl for prow */ 2508 if (jmin < jmax) { 2509 il[prow] = jmin; 2510 j = *cols; 2511 jl[prow] = jl[j]; 2512 jl[j] = prow; 2513 } 2514 prow = nextprow; 2515 } 2516 2517 /* if free space is not available, make more free space */ 2518 if (current_space->local_remaining < nzk) { 2519 i = am - k + 1; /* num of unfactored rows */ 2520 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2521 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2522 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2523 reallocs++; 2524 } 2525 2526 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2527 PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2528 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2529 2530 /* add the k-th row into il and jl */ 2531 if (nzk > 1) { 2532 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2533 jl[k] = jl[i]; 2534 jl[i] = k; 2535 il[k] = ui[k] + 1; 2536 } 2537 uj_ptr[k] = current_space->array; 2538 uj_lvl_ptr[k] = current_space_lvl->array; 2539 2540 current_space->array += nzk; 2541 current_space->local_used += nzk; 2542 current_space->local_remaining -= nzk; 2543 2544 current_space_lvl->array += nzk; 2545 current_space_lvl->local_used += nzk; 2546 current_space_lvl->local_remaining -= nzk; 2547 2548 ui[k + 1] = ui[k] + nzk; 2549 } 2550 2551 PetscCall(ISRestoreIndices(perm, &rip)); 2552 PetscCall(ISRestoreIndices(iperm, &riip)); 2553 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2554 PetscCall(PetscFree(ajtmp)); 2555 2556 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2557 PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj)); 2558 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2559 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2560 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2561 2562 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2563 2564 /* put together the new matrix in MATSEQSBAIJ format */ 2565 b = (Mat_SeqSBAIJ *)(fact)->data; 2566 b->free_ij = PETSC_TRUE; 2567 PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a)); 2568 b->free_a = PETSC_TRUE; 2569 b->j = uj; 2570 b->i = ui; 2571 b->diag = udiag; 2572 b->free_diag = PETSC_TRUE; 2573 b->ilen = NULL; 2574 b->imax = NULL; 2575 b->row = perm; 2576 b->col = perm; 2577 PetscCall(PetscObjectReference((PetscObject)perm)); 2578 PetscCall(PetscObjectReference((PetscObject)perm)); 2579 b->icol = iperm; 2580 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2581 2582 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2583 2584 b->maxnz = b->nz = ui[am]; 2585 2586 fact->info.factor_mallocs = reallocs; 2587 fact->info.fill_ratio_given = fill; 2588 if (ai[am] != 0) { 2589 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2590 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2591 } else { 2592 fact->info.fill_ratio_needed = 0.0; 2593 } 2594 #if defined(PETSC_USE_INFO) 2595 if (ai[am] != 0) { 2596 PetscReal af = fact->info.fill_ratio_needed; 2597 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2598 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2599 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2600 } else { 2601 PetscCall(PetscInfo(A, "Empty matrix\n")); 2602 } 2603 #endif 2604 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2605 PetscFunctionReturn(PETSC_SUCCESS); 2606 } 2607 2608 #if 0 2609 // unused 2610 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2611 { 2612 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2613 Mat_SeqSBAIJ *b; 2614 PetscBool perm_identity, missing; 2615 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2616 const PetscInt *rip, *riip; 2617 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2618 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2619 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2620 PetscReal fill = info->fill, levels = info->levels; 2621 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2622 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2623 PetscBT lnkbt; 2624 IS iperm; 2625 2626 PetscFunctionBegin; 2627 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2628 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2629 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2630 PetscCall(ISIdentity(perm, &perm_identity)); 2631 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2632 2633 PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui)); 2634 PetscCall(PetscMalloc1(am + 1, &udiag)); 2635 ui[0] = 0; 2636 2637 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2638 if (!levels && perm_identity) { 2639 for (i = 0; i < am; i++) { 2640 ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 2641 udiag[i] = ui[i]; 2642 } 2643 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2644 cols = uj; 2645 for (i = 0; i < am; i++) { 2646 aj = a->j + a->diag[i]; 2647 ncols = ui[i + 1] - ui[i]; 2648 for (j = 0; j < ncols; j++) *cols++ = *aj++; 2649 } 2650 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2651 PetscCall(ISGetIndices(iperm, &riip)); 2652 PetscCall(ISGetIndices(perm, &rip)); 2653 2654 /* initialization */ 2655 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2656 2657 /* jl: linked list for storing indices of the pivot rows 2658 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2659 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2660 for (i = 0; i < am; i++) { 2661 jl[i] = am; 2662 il[i] = 0; 2663 } 2664 2665 /* create and initialize a linked list for storing column indices of the active row k */ 2666 nlnk = am + 1; 2667 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2668 2669 /* initial FreeSpace size is fill*(ai[am]+1) */ 2670 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2671 current_space = free_space; 2672 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 2673 current_space_lvl = free_space_lvl; 2674 2675 for (k = 0; k < am; k++) { /* for each active row k */ 2676 /* initialize lnk by the column indices of row rip[k] of A */ 2677 nzk = 0; 2678 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2679 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2680 ncols_upper = 0; 2681 for (j = 0; j < ncols; j++) { 2682 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2683 if (riip[i] >= k) { /* only take upper triangular entry */ 2684 ajtmp[ncols_upper] = i; 2685 ncols_upper++; 2686 } 2687 } 2688 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2689 nzk += nlnk; 2690 2691 /* update lnk by computing fill-in for each pivot row to be merged in */ 2692 prow = jl[k]; /* 1st pivot row */ 2693 2694 while (prow < k) { 2695 nextprow = jl[prow]; 2696 2697 /* merge prow into k-th row */ 2698 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2699 jmax = ui[prow + 1]; 2700 ncols = jmax - jmin; 2701 i = jmin - ui[prow]; 2702 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2703 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2704 j = *(uj - 1); 2705 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2706 nzk += nlnk; 2707 2708 /* update il and jl for prow */ 2709 if (jmin < jmax) { 2710 il[prow] = jmin; 2711 j = *cols; 2712 jl[prow] = jl[j]; 2713 jl[j] = prow; 2714 } 2715 prow = nextprow; 2716 } 2717 2718 /* if free space is not available, make more free space */ 2719 if (current_space->local_remaining < nzk) { 2720 i = am - k + 1; /* num of unfactored rows */ 2721 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2722 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2723 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2724 reallocs++; 2725 } 2726 2727 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2728 PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2729 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2730 2731 /* add the k-th row into il and jl */ 2732 if (nzk > 1) { 2733 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2734 jl[k] = jl[i]; 2735 jl[i] = k; 2736 il[k] = ui[k] + 1; 2737 } 2738 uj_ptr[k] = current_space->array; 2739 uj_lvl_ptr[k] = current_space_lvl->array; 2740 2741 current_space->array += nzk; 2742 current_space->local_used += nzk; 2743 current_space->local_remaining -= nzk; 2744 2745 current_space_lvl->array += nzk; 2746 current_space_lvl->local_used += nzk; 2747 current_space_lvl->local_remaining -= nzk; 2748 2749 ui[k + 1] = ui[k] + nzk; 2750 } 2751 2752 #if defined(PETSC_USE_INFO) 2753 if (ai[am] != 0) { 2754 PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 2755 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2756 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2757 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2758 } else { 2759 PetscCall(PetscInfo(A, "Empty matrix\n")); 2760 } 2761 #endif 2762 2763 PetscCall(ISRestoreIndices(perm, &rip)); 2764 PetscCall(ISRestoreIndices(iperm, &riip)); 2765 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2766 PetscCall(PetscFree(ajtmp)); 2767 2768 /* destroy list of free space and other temporary array(s) */ 2769 PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj)); 2770 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 2771 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2772 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2773 2774 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2775 2776 /* put together the new matrix in MATSEQSBAIJ format */ 2777 2778 b = (Mat_SeqSBAIJ *)fact->data; 2779 b->free_ij = PETSC_TRUE; 2780 PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a)); 2781 b->free_a = PETSC_TRUE; 2782 b->j = uj; 2783 b->i = ui; 2784 b->diag = udiag; 2785 b->free_diag = PETSC_TRUE; 2786 b->ilen = NULL; 2787 b->imax = NULL; 2788 b->row = perm; 2789 b->col = perm; 2790 2791 PetscCall(PetscObjectReference((PetscObject)perm)); 2792 PetscCall(PetscObjectReference((PetscObject)perm)); 2793 2794 b->icol = iperm; 2795 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2796 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2797 b->maxnz = b->nz = ui[am]; 2798 2799 fact->info.factor_mallocs = reallocs; 2800 fact->info.fill_ratio_given = fill; 2801 if (ai[am] != 0) { 2802 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 2803 } else { 2804 fact->info.fill_ratio_needed = 0.0; 2805 } 2806 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2807 PetscFunctionReturn(PETSC_SUCCESS); 2808 } 2809 #endif 2810 2811 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2812 { 2813 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2814 Mat_SeqSBAIJ *b; 2815 PetscBool perm_identity, missing; 2816 PetscReal fill = info->fill; 2817 const PetscInt *rip, *riip; 2818 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2819 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2820 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 2821 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2822 PetscBT lnkbt; 2823 IS iperm; 2824 2825 PetscFunctionBegin; 2826 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2827 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2828 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2829 2830 /* check whether perm is the identity mapping */ 2831 PetscCall(ISIdentity(perm, &perm_identity)); 2832 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2833 PetscCall(ISGetIndices(iperm, &riip)); 2834 PetscCall(ISGetIndices(perm, &rip)); 2835 2836 /* initialization */ 2837 PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 2838 PetscCall(PetscMalloc1(am + 1, &udiag)); 2839 ui[0] = 0; 2840 2841 /* jl: linked list for storing indices of the pivot rows 2842 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2843 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2844 for (i = 0; i < am; i++) { 2845 jl[i] = am; 2846 il[i] = 0; 2847 } 2848 2849 /* create and initialize a linked list for storing column indices of the active row k */ 2850 nlnk = am + 1; 2851 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2852 2853 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2854 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2855 current_space = free_space; 2856 2857 for (k = 0; k < am; k++) { /* for each active row k */ 2858 /* initialize lnk by the column indices of row rip[k] of A */ 2859 nzk = 0; 2860 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2861 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2862 ncols_upper = 0; 2863 for (j = 0; j < ncols; j++) { 2864 i = riip[*(aj + ai[rip[k]] + j)]; 2865 if (i >= k) { /* only take upper triangular entry */ 2866 cols[ncols_upper] = i; 2867 ncols_upper++; 2868 } 2869 } 2870 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2871 nzk += nlnk; 2872 2873 /* update lnk by computing fill-in for each pivot row to be merged in */ 2874 prow = jl[k]; /* 1st pivot row */ 2875 2876 while (prow < k) { 2877 nextprow = jl[prow]; 2878 /* merge prow into k-th row */ 2879 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2880 jmax = ui[prow + 1]; 2881 ncols = jmax - jmin; 2882 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2883 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2884 nzk += nlnk; 2885 2886 /* update il and jl for prow */ 2887 if (jmin < jmax) { 2888 il[prow] = jmin; 2889 j = *uj_ptr; 2890 jl[prow] = jl[j]; 2891 jl[j] = prow; 2892 } 2893 prow = nextprow; 2894 } 2895 2896 /* if free space is not available, make more free space */ 2897 if (current_space->local_remaining < nzk) { 2898 i = am - k + 1; /* num of unfactored rows */ 2899 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2900 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2901 reallocs++; 2902 } 2903 2904 /* copy data into free space, then initialize lnk */ 2905 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2906 2907 /* add the k-th row into il and jl */ 2908 if (nzk > 1) { 2909 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2910 jl[k] = jl[i]; 2911 jl[i] = k; 2912 il[k] = ui[k] + 1; 2913 } 2914 ui_ptr[k] = current_space->array; 2915 2916 current_space->array += nzk; 2917 current_space->local_used += nzk; 2918 current_space->local_remaining -= nzk; 2919 2920 ui[k + 1] = ui[k] + nzk; 2921 } 2922 2923 PetscCall(ISRestoreIndices(perm, &rip)); 2924 PetscCall(ISRestoreIndices(iperm, &riip)); 2925 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 2926 2927 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2928 PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj)); 2929 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2930 PetscCall(PetscLLDestroy(lnk, lnkbt)); 2931 2932 /* put together the new matrix in MATSEQSBAIJ format */ 2933 b = (Mat_SeqSBAIJ *)fact->data; 2934 b->free_ij = PETSC_TRUE; 2935 PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a)); 2936 b->free_a = PETSC_TRUE; 2937 b->j = uj; 2938 b->i = ui; 2939 b->diag = udiag; 2940 b->free_diag = PETSC_TRUE; 2941 b->ilen = NULL; 2942 b->imax = NULL; 2943 b->row = perm; 2944 b->col = perm; 2945 2946 PetscCall(PetscObjectReference((PetscObject)perm)); 2947 PetscCall(PetscObjectReference((PetscObject)perm)); 2948 2949 b->icol = iperm; 2950 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2951 2952 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2953 2954 b->maxnz = b->nz = ui[am]; 2955 2956 fact->info.factor_mallocs = reallocs; 2957 fact->info.fill_ratio_given = fill; 2958 if (ai[am] != 0) { 2959 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2960 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2961 } else { 2962 fact->info.fill_ratio_needed = 0.0; 2963 } 2964 #if defined(PETSC_USE_INFO) 2965 if (ai[am] != 0) { 2966 PetscReal af = fact->info.fill_ratio_needed; 2967 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2968 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2969 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2970 } else { 2971 PetscCall(PetscInfo(A, "Empty matrix\n")); 2972 } 2973 #endif 2974 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2975 PetscFunctionReturn(PETSC_SUCCESS); 2976 } 2977 2978 #if 0 2979 // unused 2980 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2981 { 2982 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2983 Mat_SeqSBAIJ *b; 2984 PetscBool perm_identity, missing; 2985 PetscReal fill = info->fill; 2986 const PetscInt *rip, *riip; 2987 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2988 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2989 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 2990 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2991 PetscBT lnkbt; 2992 IS iperm; 2993 2994 PetscFunctionBegin; 2995 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2996 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2997 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2998 2999 /* check whether perm is the identity mapping */ 3000 PetscCall(ISIdentity(perm, &perm_identity)); 3001 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 3002 PetscCall(ISGetIndices(iperm, &riip)); 3003 PetscCall(ISGetIndices(perm, &rip)); 3004 3005 /* initialization */ 3006 PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui)); 3007 ui[0] = 0; 3008 3009 /* jl: linked list for storing indices of the pivot rows 3010 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3011 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 3012 for (i = 0; i < am; i++) { 3013 jl[i] = am; 3014 il[i] = 0; 3015 } 3016 3017 /* create and initialize a linked list for storing column indices of the active row k */ 3018 nlnk = am + 1; 3019 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 3020 3021 /* initial FreeSpace size is fill*(ai[am]+1) */ 3022 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 3023 current_space = free_space; 3024 3025 for (k = 0; k < am; k++) { /* for each active row k */ 3026 /* initialize lnk by the column indices of row rip[k] of A */ 3027 nzk = 0; 3028 ncols = ai[rip[k] + 1] - ai[rip[k]]; 3029 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 3030 ncols_upper = 0; 3031 for (j = 0; j < ncols; j++) { 3032 i = riip[*(aj + ai[rip[k]] + j)]; 3033 if (i >= k) { /* only take upper triangular entry */ 3034 cols[ncols_upper] = i; 3035 ncols_upper++; 3036 } 3037 } 3038 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 3039 nzk += nlnk; 3040 3041 /* update lnk by computing fill-in for each pivot row to be merged in */ 3042 prow = jl[k]; /* 1st pivot row */ 3043 3044 while (prow < k) { 3045 nextprow = jl[prow]; 3046 /* merge prow into k-th row */ 3047 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3048 jmax = ui[prow + 1]; 3049 ncols = jmax - jmin; 3050 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3051 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 3052 nzk += nlnk; 3053 3054 /* update il and jl for prow */ 3055 if (jmin < jmax) { 3056 il[prow] = jmin; 3057 j = *uj_ptr; 3058 jl[prow] = jl[j]; 3059 jl[j] = prow; 3060 } 3061 prow = nextprow; 3062 } 3063 3064 /* if free space is not available, make more free space */ 3065 if (current_space->local_remaining < nzk) { 3066 i = am - k + 1; /* num of unfactored rows */ 3067 i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3068 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 3069 reallocs++; 3070 } 3071 3072 /* copy data into free space, then initialize lnk */ 3073 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 3074 3075 /* add the k-th row into il and jl */ 3076 if (nzk - 1 > 0) { 3077 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3078 jl[k] = jl[i]; 3079 jl[i] = k; 3080 il[k] = ui[k] + 1; 3081 } 3082 ui_ptr[k] = current_space->array; 3083 3084 current_space->array += nzk; 3085 current_space->local_used += nzk; 3086 current_space->local_remaining -= nzk; 3087 3088 ui[k + 1] = ui[k] + nzk; 3089 } 3090 3091 #if defined(PETSC_USE_INFO) 3092 if (ai[am] != 0) { 3093 PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am]; 3094 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 3095 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3096 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 3097 } else { 3098 PetscCall(PetscInfo(A, "Empty matrix\n")); 3099 } 3100 #endif 3101 3102 PetscCall(ISRestoreIndices(perm, &rip)); 3103 PetscCall(ISRestoreIndices(iperm, &riip)); 3104 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 3105 3106 /* destroy list of free space and other temporary array(s) */ 3107 PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj)); 3108 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 3109 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3110 3111 /* put together the new matrix in MATSEQSBAIJ format */ 3112 b = (Mat_SeqSBAIJ *)fact->data; 3113 b->free_ij = PETSC_TRUE; 3114 PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a)); 3115 b->free_a = PETSC_TRUE; 3116 b->j = uj; 3117 b->i = ui; 3118 b->diag = NULL; 3119 b->ilen = NULL; 3120 b->imax = NULL; 3121 b->row = perm; 3122 b->col = perm; 3123 3124 PetscCall(PetscObjectReference((PetscObject)perm)); 3125 PetscCall(PetscObjectReference((PetscObject)perm)); 3126 3127 b->icol = iperm; 3128 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3129 3130 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 3131 b->maxnz = b->nz = ui[am]; 3132 3133 fact->info.factor_mallocs = reallocs; 3134 fact->info.fill_ratio_given = fill; 3135 if (ai[am] != 0) { 3136 fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am]; 3137 } else { 3138 fact->info.fill_ratio_needed = 0.0; 3139 } 3140 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3141 PetscFunctionReturn(PETSC_SUCCESS); 3142 } 3143 #endif 3144 3145 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 3146 { 3147 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3148 PetscInt n = A->rmap->n; 3149 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3150 PetscScalar *x, sum; 3151 const PetscScalar *b; 3152 const MatScalar *aa = a->a, *v; 3153 PetscInt i, nz; 3154 3155 PetscFunctionBegin; 3156 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3157 3158 PetscCall(VecGetArrayRead(bb, &b)); 3159 PetscCall(VecGetArrayWrite(xx, &x)); 3160 3161 /* forward solve the lower triangular */ 3162 x[0] = b[0]; 3163 v = aa; 3164 vi = aj; 3165 for (i = 1; i < n; i++) { 3166 nz = ai[i + 1] - ai[i]; 3167 sum = b[i]; 3168 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3169 v += nz; 3170 vi += nz; 3171 x[i] = sum; 3172 } 3173 3174 /* backward solve the upper triangular */ 3175 for (i = n - 1; i >= 0; i--) { 3176 v = aa + adiag[i + 1] + 1; 3177 vi = aj + adiag[i + 1] + 1; 3178 nz = adiag[i] - adiag[i + 1] - 1; 3179 sum = x[i]; 3180 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3181 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3182 } 3183 3184 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3185 PetscCall(VecRestoreArrayRead(bb, &b)); 3186 PetscCall(VecRestoreArrayWrite(xx, &x)); 3187 PetscFunctionReturn(PETSC_SUCCESS); 3188 } 3189 3190 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 3191 { 3192 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3193 IS iscol = a->col, isrow = a->row; 3194 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3195 const PetscInt *rout, *cout, *r, *c; 3196 PetscScalar *x, *tmp, sum; 3197 const PetscScalar *b; 3198 const MatScalar *aa = a->a, *v; 3199 3200 PetscFunctionBegin; 3201 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3202 3203 PetscCall(VecGetArrayRead(bb, &b)); 3204 PetscCall(VecGetArrayWrite(xx, &x)); 3205 tmp = a->solve_work; 3206 3207 PetscCall(ISGetIndices(isrow, &rout)); 3208 r = rout; 3209 PetscCall(ISGetIndices(iscol, &cout)); 3210 c = cout; 3211 3212 /* forward solve the lower triangular */ 3213 tmp[0] = b[r[0]]; 3214 v = aa; 3215 vi = aj; 3216 for (i = 1; i < n; i++) { 3217 nz = ai[i + 1] - ai[i]; 3218 sum = b[r[i]]; 3219 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3220 tmp[i] = sum; 3221 v += nz; 3222 vi += nz; 3223 } 3224 3225 /* backward solve the upper triangular */ 3226 for (i = n - 1; i >= 0; i--) { 3227 v = aa + adiag[i + 1] + 1; 3228 vi = aj + adiag[i + 1] + 1; 3229 nz = adiag[i] - adiag[i + 1] - 1; 3230 sum = tmp[i]; 3231 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3232 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3233 } 3234 3235 PetscCall(ISRestoreIndices(isrow, &rout)); 3236 PetscCall(ISRestoreIndices(iscol, &cout)); 3237 PetscCall(VecRestoreArrayRead(bb, &b)); 3238 PetscCall(VecRestoreArrayWrite(xx, &x)); 3239 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3240 PetscFunctionReturn(PETSC_SUCCESS); 3241 } 3242 3243 #if 0 3244 // unused 3245 /* 3246 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3247 */ 3248 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) 3249 { 3250 Mat B = *fact; 3251 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3252 IS isicol; 3253 const PetscInt *r, *ic; 3254 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3255 PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3256 PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 3257 PetscInt nlnk, *lnk; 3258 PetscBT lnkbt; 3259 PetscBool row_identity, icol_identity; 3260 MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 3261 const PetscInt *ics; 3262 PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3263 PetscReal dt = info->dt, shift = info->shiftamount; 3264 PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3265 PetscBool missing; 3266 3267 PetscFunctionBegin; 3268 if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005; 3269 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3270 3271 /* symbolic factorization, can be reused */ 3272 PetscCall(MatMissingDiagonal(A, &missing, &i)); 3273 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 3274 adiag = a->diag; 3275 3276 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3277 3278 /* bdiag is location of diagonal in factor */ 3279 PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 3280 PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3281 3282 /* allocate row pointers bi */ 3283 PetscCall(PetscShmgetAllocateArray(2 * n + 2,sizeof(PetscInt),(void **)&bi)); 3284 3285 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3286 if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 3287 nnz_max = ai[n] + 2 * n * dtcount + 2; 3288 3289 PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscInt),(void **)&bj)); 3290 PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscScalar),(void **)&ba)); 3291 3292 /* put together the new matrix */ 3293 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3294 b = (Mat_SeqAIJ *)B->data; 3295 b->free_a = PETSC_TRUE; 3296 b->free_ij = PETSC_TRUE; 3297 b->a = ba; 3298 b->j = bj; 3299 b->i = bi; 3300 b->diag = bdiag; 3301 b->ilen = NULL; 3302 b->imax = NULL; 3303 b->row = isrow; 3304 b->col = iscol; 3305 PetscCall(PetscObjectReference((PetscObject)isrow)); 3306 PetscCall(PetscObjectReference((PetscObject)iscol)); 3307 b->icol = isicol; 3308 3309 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 3310 b->maxnz = nnz_max; 3311 3312 B->factortype = MAT_FACTOR_ILUDT; 3313 B->info.factor_mallocs = 0; 3314 B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 3315 /* end of symbolic factorization */ 3316 3317 PetscCall(ISGetIndices(isrow, &r)); 3318 PetscCall(ISGetIndices(isicol, &ic)); 3319 ics = ic; 3320 3321 /* linked list for storing column indices of the active row */ 3322 nlnk = n + 1; 3323 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3324 3325 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3326 PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 3327 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3328 PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 3329 PetscCall(PetscArrayzero(rtmp, n)); 3330 3331 bi[0] = 0; 3332 bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3333 bdiag_rev[n] = bdiag[0]; 3334 bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3335 for (i = 0; i < n; i++) { 3336 /* copy initial fill into linked list */ 3337 nzi = ai[r[i] + 1] - ai[r[i]]; 3338 PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 3339 nzi_al = adiag[r[i]] - ai[r[i]]; 3340 nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3341 ajtmp = aj + ai[r[i]]; 3342 PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3343 3344 /* load in initial (unfactored row) */ 3345 aatmp = a->a + ai[r[i]]; 3346 for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++; 3347 3348 /* add pivot rows into linked list */ 3349 row = lnk[n]; 3350 while (row < i) { 3351 nzi_bl = bi[row + 1] - bi[row] + 1; 3352 bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 3353 PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3354 nzi += nlnk; 3355 row = lnk[row]; 3356 } 3357 3358 /* copy data from lnk into jtmp, then initialize lnk */ 3359 PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3360 3361 /* numerical factorization */ 3362 bjtmp = jtmp; 3363 row = *bjtmp++; /* 1st pivot row */ 3364 while (row < i) { 3365 pc = rtmp + row; 3366 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3367 multiplier = (*pc) * (*pv); 3368 *pc = multiplier; 3369 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3370 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3371 pv = ba + bdiag[row + 1] + 1; 3372 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3373 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3374 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3375 } 3376 row = *bjtmp++; 3377 } 3378 3379 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3380 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3381 nzi_bl = 0; 3382 j = 0; 3383 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3384 vtmp[j] = rtmp[jtmp[j]]; 3385 rtmp[jtmp[j]] = 0.0; 3386 nzi_bl++; 3387 j++; 3388 } 3389 nzi_bu = nzi - nzi_bl - 1; 3390 while (j < nzi) { 3391 vtmp[j] = rtmp[jtmp[j]]; 3392 rtmp[jtmp[j]] = 0.0; 3393 j++; 3394 } 3395 3396 bjtmp = bj + bi[i]; 3397 batmp = ba + bi[i]; 3398 /* apply level dropping rule to L part */ 3399 ncut = nzi_al + dtcount; 3400 if (ncut < nzi_bl) { 3401 PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 3402 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3403 } else { 3404 ncut = nzi_bl; 3405 } 3406 for (j = 0; j < ncut; j++) { 3407 bjtmp[j] = jtmp[j]; 3408 batmp[j] = vtmp[j]; 3409 } 3410 bi[i + 1] = bi[i] + ncut; 3411 nzi = ncut + 1; 3412 3413 /* apply level dropping rule to U part */ 3414 ncut = nzi_au + dtcount; 3415 if (ncut < nzi_bu) { 3416 PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 3417 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 3418 } else { 3419 ncut = nzi_bu; 3420 } 3421 nzi += ncut; 3422 3423 /* mark bdiagonal */ 3424 bdiag[i + 1] = bdiag[i] - (ncut + 1); 3425 bdiag_rev[n - i - 1] = bdiag[i + 1]; 3426 bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 3427 bjtmp = bj + bdiag[i]; 3428 batmp = ba + bdiag[i]; 3429 *bjtmp = i; 3430 *batmp = diag_tmp; /* rtmp[i]; */ 3431 if (*batmp == 0.0) *batmp = dt + shift; 3432 *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 3433 3434 bjtmp = bj + bdiag[i + 1] + 1; 3435 batmp = ba + bdiag[i + 1] + 1; 3436 for (k = 0; k < ncut; k++) { 3437 bjtmp[k] = jtmp[nzi_bl + 1 + k]; 3438 batmp[k] = vtmp[nzi_bl + 1 + k]; 3439 } 3440 3441 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3442 } /* for (i=0; i<n; i++) */ 3443 PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3444 3445 PetscCall(ISRestoreIndices(isrow, &r)); 3446 PetscCall(ISRestoreIndices(isicol, &ic)); 3447 3448 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3449 PetscCall(PetscFree2(im, jtmp)); 3450 PetscCall(PetscFree2(rtmp, vtmp)); 3451 PetscCall(PetscFree(bdiag_rev)); 3452 3453 PetscCall(PetscLogFlops(B->cmap->n)); 3454 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3455 3456 PetscCall(ISIdentity(isrow, &row_identity)); 3457 PetscCall(ISIdentity(isicol, &icol_identity)); 3458 if (row_identity && icol_identity) { 3459 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3460 } else { 3461 B->ops->solve = MatSolve_SeqAIJ; 3462 } 3463 3464 B->ops->solveadd = NULL; 3465 B->ops->solvetranspose = NULL; 3466 B->ops->solvetransposeadd = NULL; 3467 B->ops->matsolve = NULL; 3468 B->assembled = PETSC_TRUE; 3469 B->preallocated = PETSC_TRUE; 3470 PetscFunctionReturn(PETSC_SUCCESS); 3471 } 3472 3473 /* a wrapper of MatILUDTFactor_SeqAIJ() */ 3474 /* 3475 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3476 */ 3477 3478 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 3479 { 3480 PetscFunctionBegin; 3481 PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 3482 PetscFunctionReturn(PETSC_SUCCESS); 3483 } 3484 3485 /* 3486 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3487 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3488 */ 3489 /* 3490 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3491 */ 3492 3493 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) 3494 { 3495 Mat C = fact; 3496 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 3497 IS isrow = b->row, isicol = b->icol; 3498 const PetscInt *r, *ic, *ics; 3499 PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3500 PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 3501 MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3502 PetscReal dt = info->dt, shift = info->shiftamount; 3503 PetscBool row_identity, col_identity; 3504 3505 PetscFunctionBegin; 3506 PetscCall(ISGetIndices(isrow, &r)); 3507 PetscCall(ISGetIndices(isicol, &ic)); 3508 PetscCall(PetscMalloc1(n + 1, &rtmp)); 3509 ics = ic; 3510 3511 for (i = 0; i < n; i++) { 3512 /* initialize rtmp array */ 3513 nzl = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */ 3514 bjtmp = bj + bi[i]; 3515 for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3516 rtmp[i] = 0.0; 3517 nzu = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */ 3518 bjtmp = bj + bdiag[i + 1] + 1; 3519 for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 3520 3521 /* load in initial unfactored row of A */ 3522 nz = ai[r[i] + 1] - ai[r[i]]; 3523 ajtmp = aj + ai[r[i]]; 3524 v = aa + ai[r[i]]; 3525 for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j]; 3526 3527 /* numerical factorization */ 3528 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3529 nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3530 k = 0; 3531 while (k < nzl) { 3532 row = *bjtmp++; 3533 pc = rtmp + row; 3534 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3535 multiplier = (*pc) * (*pv); 3536 *pc = multiplier; 3537 if (PetscAbsScalar(multiplier) > dt) { 3538 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3539 pv = b->a + bdiag[row + 1] + 1; 3540 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3541 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3542 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3543 } 3544 k++; 3545 } 3546 3547 /* finished row so stick it into b->a */ 3548 /* L-part */ 3549 pv = b->a + bi[i]; 3550 pj = bj + bi[i]; 3551 nzl = bi[i + 1] - bi[i]; 3552 for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]]; 3553 3554 /* diagonal: invert diagonal entries for simpler triangular solves */ 3555 if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3556 b->a[bdiag[i]] = 1.0 / rtmp[i]; 3557 3558 /* U-part */ 3559 pv = b->a + bdiag[i + 1] + 1; 3560 pj = bj + bdiag[i + 1] + 1; 3561 nzu = bdiag[i] - bdiag[i + 1] - 1; 3562 for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]]; 3563 } 3564 3565 PetscCall(PetscFree(rtmp)); 3566 PetscCall(ISRestoreIndices(isicol, &ic)); 3567 PetscCall(ISRestoreIndices(isrow, &r)); 3568 3569 PetscCall(ISIdentity(isrow, &row_identity)); 3570 PetscCall(ISIdentity(isicol, &col_identity)); 3571 if (row_identity && col_identity) { 3572 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3573 } else { 3574 C->ops->solve = MatSolve_SeqAIJ; 3575 } 3576 C->ops->solveadd = NULL; 3577 C->ops->solvetranspose = NULL; 3578 C->ops->solvetransposeadd = NULL; 3579 C->ops->matsolve = NULL; 3580 C->assembled = PETSC_TRUE; 3581 C->preallocated = PETSC_TRUE; 3582 3583 PetscCall(PetscLogFlops(C->cmap->n)); 3584 PetscFunctionReturn(PETSC_SUCCESS); 3585 } 3586 #endif 3587