1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <petscbt.h> 4 #include <../src/mat/utils/freespace.h> 5 6 /* 7 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 8 9 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 10 */ 11 #if 0 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol) 13 { 14 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data; 15 PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order; 16 const PetscInt *ai = a->i, *aj = a->j; 17 const PetscScalar *aa = a->a; 18 PetscBool *done; 19 PetscReal best, past = 0, future; 20 21 PetscFunctionBegin; 22 /* pick initial row */ 23 best = -1; 24 for (i = 0; i < n; i++) { 25 future = 0.0; 26 for (j = ai[i]; j < ai[i + 1]; j++) { 27 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 28 else past = PetscAbsScalar(aa[j]); 29 } 30 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 31 if (past / future > best) { 32 best = past / future; 33 current = i; 34 } 35 } 36 37 PetscCall(PetscMalloc1(n, &done)); 38 PetscCall(PetscArrayzero(done, n)); 39 PetscCall(PetscMalloc1(n, &order)); 40 order[0] = current; 41 for (i = 0; i < n - 1; i++) { 42 done[current] = PETSC_TRUE; 43 best = -1; 44 /* loop over all neighbors of current pivot */ 45 for (j = ai[current]; j < ai[current + 1]; j++) { 46 jj = aj[j]; 47 if (done[jj]) continue; 48 /* loop over columns of potential next row computing weights for below and above diagonal */ 49 past = future = 0.0; 50 for (k = ai[jj]; k < ai[jj + 1]; k++) { 51 kk = aj[k]; 52 if (done[kk]) past += PetscAbsScalar(aa[k]); 53 else if (kk != jj) future += PetscAbsScalar(aa[k]); 54 } 55 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 56 if (past / future > best) { 57 best = past / future; 58 newcurrent = jj; 59 } 60 } 61 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 62 best = -1; 63 for (k = 0; k < n; k++) { 64 if (done[k]) continue; 65 future = 0.0; 66 past = 0.0; 67 for (j = ai[k]; j < ai[k + 1]; j++) { 68 kk = aj[j]; 69 if (done[kk]) past += PetscAbsScalar(aa[j]); 70 else if (kk != k) future += PetscAbsScalar(aa[j]); 71 } 72 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 73 if (past / future > best) { 74 best = past / future; 75 newcurrent = k; 76 } 77 } 78 } 79 PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current"); 80 current = newcurrent; 81 order[i + 1] = current; 82 } 83 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow)); 84 *icol = *irow; 85 PetscCall(PetscObjectReference((PetscObject)*irow)); 86 PetscCall(PetscFree(done)); 87 PetscCall(PetscFree(order)); 88 PetscFunctionReturn(PETSC_SUCCESS); 89 } 90 #endif 91 92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 93 { 94 PetscFunctionBegin; 95 *type = MATSOLVERPETSC; 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98 99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 100 { 101 PetscInt n = A->rmap->n; 102 103 PetscFunctionBegin; 104 #if defined(PETSC_USE_COMPLEX) 105 if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 106 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n")); 107 *B = NULL; 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 #endif 111 112 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 113 PetscCall(MatSetSizes(*B, n, n, n, n)); 114 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 115 PetscCall(MatSetType(*B, MATSEQAIJ)); 116 117 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 118 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 119 120 PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 121 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 122 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 123 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 124 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 125 PetscCall(MatSetType(*B, MATSEQSBAIJ)); 126 PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 127 128 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 129 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 130 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 131 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 132 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 133 (*B)->factortype = ftype; 134 135 PetscCall(PetscFree((*B)->solvertype)); 136 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 137 (*B)->canuseordering = PETSC_TRUE; 138 PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 139 PetscFunctionReturn(PETSC_SUCCESS); 140 } 141 142 #if 0 143 // currently unused 144 static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 145 { 146 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 147 IS isicol; 148 const PetscInt *r, *ic; 149 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j; 150 PetscInt *bi, *bj, *ajtmp; 151 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 152 PetscReal f; 153 PetscInt nlnk, *lnk, k, **bi_ptr; 154 PetscFreeSpaceList free_space = NULL, current_space = NULL; 155 PetscBT lnkbt; 156 PetscBool missing; 157 158 PetscFunctionBegin; 159 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 160 PetscCall(MatMissingDiagonal(A, &missing, &i)); 161 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 162 163 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 164 PetscCall(ISGetIndices(isrow, &r)); 165 PetscCall(ISGetIndices(isicol, &ic)); 166 167 /* get new row pointers */ 168 PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi)); 169 bi[0] = 0; 170 171 /* bdiag is location of diagonal in factor */ 172 PetscCall(PetscMalloc1(n + 1, &bdiag)); 173 bdiag[0] = 0; 174 175 /* linked list for storing column indices of the active row */ 176 nlnk = n + 1; 177 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 178 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 179 180 /* initial FreeSpace size is f*(ai[n]+1) */ 181 f = info->fill; 182 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 183 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 184 current_space = free_space; 185 186 for (i = 0; i < n; i++) { 187 /* copy previous fill into linked list */ 188 nzi = 0; 189 nnz = ai[r[i] + 1] - ai[r[i]]; 190 ajtmp = aj + ai[r[i]]; 191 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 192 nzi += nlnk; 193 194 /* add pivot rows into linked list */ 195 row = lnk[n]; 196 while (row < i) { 197 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 198 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 199 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 200 nzi += nlnk; 201 row = lnk[row]; 202 } 203 bi[i + 1] = bi[i] + nzi; 204 im[i] = nzi; 205 206 /* mark bdiag */ 207 nzbd = 0; 208 nnz = nzi; 209 k = lnk[n]; 210 while (nnz-- && k < i) { 211 nzbd++; 212 k = lnk[k]; 213 } 214 bdiag[i] = bi[i] + nzbd; 215 216 /* if free space is not available, make more free space */ 217 if (current_space->local_remaining < nzi) { 218 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 219 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 220 reallocs++; 221 } 222 223 /* copy data into free space, then initialize lnk */ 224 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 225 226 bi_ptr[i] = current_space->array; 227 current_space->array += nzi; 228 current_space->local_used += nzi; 229 current_space->local_remaining -= nzi; 230 } 231 #if defined(PETSC_USE_INFO) 232 if (ai[n] != 0) { 233 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 234 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 235 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 236 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 237 PetscCall(PetscInfo(A, "for best performance.\n")); 238 } else { 239 PetscCall(PetscInfo(A, "Empty matrix\n")); 240 } 241 #endif 242 243 PetscCall(ISRestoreIndices(isrow, &r)); 244 PetscCall(ISRestoreIndices(isicol, &ic)); 245 246 /* destroy list of free space and other temporary array(s) */ 247 PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj)); 248 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 249 PetscCall(PetscLLDestroy(lnk, lnkbt)); 250 PetscCall(PetscFree2(bi_ptr, im)); 251 252 /* put together the new matrix */ 253 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 254 b = (Mat_SeqAIJ *)B->data; 255 b->free_ij = PETSC_TRUE; 256 PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a)); 257 b->free_a = PETSC_TRUE; 258 b->j = bj; 259 b->i = bi; 260 b->diag = bdiag; 261 b->ilen = NULL; 262 b->imax = NULL; 263 b->row = isrow; 264 b->col = iscol; 265 PetscCall(PetscObjectReference((PetscObject)isrow)); 266 PetscCall(PetscObjectReference((PetscObject)iscol)); 267 b->icol = isicol; 268 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 269 270 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 271 b->maxnz = b->nz = bi[n]; 272 273 B->factortype = MAT_FACTOR_LU; 274 B->info.factor_mallocs = reallocs; 275 B->info.fill_ratio_given = f; 276 277 if (ai[n]) { 278 B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 279 } else { 280 B->info.fill_ratio_needed = 0.0; 281 } 282 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 283 if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 #endif 287 288 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 289 { 290 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 291 IS isicol; 292 const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 293 PetscInt i, n = A->rmap->n; 294 PetscInt *bi, *bj; 295 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 296 PetscReal f; 297 PetscInt nlnk, *lnk, k, **bi_ptr; 298 PetscFreeSpaceList free_space = NULL, current_space = NULL; 299 PetscBT lnkbt; 300 PetscBool missing; 301 302 PetscFunctionBegin; 303 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 304 PetscCall(MatMissingDiagonal(A, &missing, &i)); 305 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 306 307 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 308 PetscCall(ISGetIndices(isrow, &r)); 309 PetscCall(ISGetIndices(isicol, &ic)); 310 311 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 312 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 313 PetscCall(PetscMalloc1(n + 1, &bdiag)); 314 bi[0] = bdiag[0] = 0; 315 316 /* linked list for storing column indices of the active row */ 317 nlnk = n + 1; 318 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 319 320 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 321 322 /* initial FreeSpace size is f*(ai[n]+1) */ 323 f = info->fill; 324 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 325 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 326 current_space = free_space; 327 328 for (i = 0; i < n; i++) { 329 /* copy previous fill into linked list */ 330 nzi = 0; 331 nnz = ai[r[i] + 1] - ai[r[i]]; 332 ajtmp = aj + ai[r[i]]; 333 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 334 nzi += nlnk; 335 336 /* add pivot rows into linked list */ 337 row = lnk[n]; 338 while (row < i) { 339 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 340 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 341 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 342 nzi += nlnk; 343 row = lnk[row]; 344 } 345 bi[i + 1] = bi[i] + nzi; 346 im[i] = nzi; 347 348 /* mark bdiag */ 349 nzbd = 0; 350 nnz = nzi; 351 k = lnk[n]; 352 while (nnz-- && k < i) { 353 nzbd++; 354 k = lnk[k]; 355 } 356 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 357 358 /* if free space is not available, make more free space */ 359 if (current_space->local_remaining < nzi) { 360 /* estimated additional space needed */ 361 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 362 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 363 reallocs++; 364 } 365 366 /* copy data into free space, then initialize lnk */ 367 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 368 369 bi_ptr[i] = current_space->array; 370 current_space->array += nzi; 371 current_space->local_used += nzi; 372 current_space->local_remaining -= nzi; 373 } 374 375 PetscCall(ISRestoreIndices(isrow, &r)); 376 PetscCall(ISRestoreIndices(isicol, &ic)); 377 378 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 379 PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj)); 380 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 381 PetscCall(PetscLLDestroy(lnk, lnkbt)); 382 PetscCall(PetscFree2(bi_ptr, im)); 383 384 /* put together the new matrix */ 385 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 386 b = (Mat_SeqAIJ *)B->data; 387 b->free_ij = PETSC_TRUE; 388 PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 389 b->free_a = PETSC_TRUE; 390 b->j = bj; 391 b->i = bi; 392 b->diag = bdiag; 393 b->ilen = NULL; 394 b->imax = NULL; 395 b->row = isrow; 396 b->col = iscol; 397 PetscCall(PetscObjectReference((PetscObject)isrow)); 398 PetscCall(PetscObjectReference((PetscObject)iscol)); 399 b->icol = isicol; 400 PetscCall(PetscMalloc1(n, &b->solve_work)); 401 402 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 403 b->maxnz = b->nz = bdiag[0] + 1; 404 405 B->factortype = MAT_FACTOR_LU; 406 B->info.factor_mallocs = reallocs; 407 B->info.fill_ratio_given = f; 408 409 if (ai[n]) { 410 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 411 } else { 412 B->info.fill_ratio_needed = 0.0; 413 } 414 #if defined(PETSC_USE_INFO) 415 if (ai[n] != 0) { 416 PetscReal af = B->info.fill_ratio_needed; 417 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 418 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 419 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 420 PetscCall(PetscInfo(A, "for best performance.\n")); 421 } else { 422 PetscCall(PetscInfo(A, "Empty matrix\n")); 423 } 424 #endif 425 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 426 if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 427 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 428 PetscFunctionReturn(PETSC_SUCCESS); 429 } 430 431 /* 432 Trouble in factorization, should we dump the original matrix? 433 */ 434 PetscErrorCode MatFactorDumpMatrix(Mat A) 435 { 436 PetscBool flg = PETSC_FALSE; 437 438 PetscFunctionBegin; 439 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 440 if (flg) { 441 PetscViewer viewer; 442 char filename[PETSC_MAX_PATH_LEN]; 443 444 PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 445 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 446 PetscCall(MatView(A, viewer)); 447 PetscCall(PetscViewerDestroy(&viewer)); 448 } 449 PetscFunctionReturn(PETSC_SUCCESS); 450 } 451 452 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 453 { 454 Mat C = B; 455 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 456 IS isrow = b->row, isicol = b->icol; 457 const PetscInt *r, *ic, *ics; 458 const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 459 PetscInt i, j, k, nz, nzL, row, *pj; 460 const PetscInt *ajtmp, *bjtmp; 461 MatScalar *rtmp, *pc, multiplier, *pv; 462 const MatScalar *aa, *v; 463 MatScalar *ba; 464 PetscBool row_identity, col_identity; 465 FactorShiftCtx sctx; 466 const PetscInt *ddiag; 467 PetscReal rs; 468 MatScalar d; 469 470 PetscFunctionBegin; 471 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 472 PetscCall(MatSeqAIJGetArrayWrite(B, &ba)); 473 /* MatPivotSetUp(): initialize shift context sctx */ 474 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 475 476 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 477 ddiag = a->diag; 478 sctx.shift_top = info->zeropivot; 479 for (i = 0; i < n; i++) { 480 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 481 d = (aa)[ddiag[i]]; 482 rs = -PetscAbsScalar(d) - PetscRealPart(d); 483 v = aa + ai[i]; 484 nz = ai[i + 1] - ai[i]; 485 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 486 if (rs > sctx.shift_top) sctx.shift_top = rs; 487 } 488 sctx.shift_top *= 1.1; 489 sctx.nshift_max = 5; 490 sctx.shift_lo = 0.; 491 sctx.shift_hi = 1.; 492 } 493 494 PetscCall(ISGetIndices(isrow, &r)); 495 PetscCall(ISGetIndices(isicol, &ic)); 496 PetscCall(PetscMalloc1(n + 1, &rtmp)); 497 ics = ic; 498 499 do { 500 sctx.newshift = PETSC_FALSE; 501 for (i = 0; i < n; i++) { 502 /* zero rtmp */ 503 /* L part */ 504 nz = bi[i + 1] - bi[i]; 505 bjtmp = bj + bi[i]; 506 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 507 508 /* U part */ 509 nz = bdiag[i] - bdiag[i + 1]; 510 bjtmp = bj + bdiag[i + 1] + 1; 511 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 512 513 /* load in initial (unfactored row) */ 514 nz = ai[r[i] + 1] - ai[r[i]]; 515 ajtmp = aj + ai[r[i]]; 516 v = aa + ai[r[i]]; 517 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 518 /* ZeropivotApply() */ 519 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 520 521 /* elimination */ 522 bjtmp = bj + bi[i]; 523 row = *bjtmp++; 524 nzL = bi[i + 1] - bi[i]; 525 for (k = 0; k < nzL; k++) { 526 pc = rtmp + row; 527 if (*pc != 0.0) { 528 pv = ba + bdiag[row]; 529 multiplier = *pc * (*pv); 530 *pc = multiplier; 531 532 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 533 pv = ba + bdiag[row + 1] + 1; 534 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 535 536 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 537 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 538 } 539 row = *bjtmp++; 540 } 541 542 /* finished row so stick it into b->a */ 543 rs = 0.0; 544 /* L part */ 545 pv = ba + bi[i]; 546 pj = b->j + bi[i]; 547 nz = bi[i + 1] - bi[i]; 548 for (j = 0; j < nz; j++) { 549 pv[j] = rtmp[pj[j]]; 550 rs += PetscAbsScalar(pv[j]); 551 } 552 553 /* U part */ 554 pv = ba + bdiag[i + 1] + 1; 555 pj = b->j + bdiag[i + 1] + 1; 556 nz = bdiag[i] - bdiag[i + 1] - 1; 557 for (j = 0; j < nz; j++) { 558 pv[j] = rtmp[pj[j]]; 559 rs += PetscAbsScalar(pv[j]); 560 } 561 562 sctx.rs = rs; 563 sctx.pv = rtmp[i]; 564 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 565 if (sctx.newshift) break; /* break for-loop */ 566 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 567 568 /* Mark diagonal and invert diagonal for simpler triangular solves */ 569 pv = ba + bdiag[i]; 570 *pv = 1.0 / rtmp[i]; 571 572 } /* endof for (i=0; i<n; i++) { */ 573 574 /* MatPivotRefine() */ 575 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 576 /* 577 * if no shift in this attempt & shifting & started shifting & can refine, 578 * then try lower shift 579 */ 580 sctx.shift_hi = sctx.shift_fraction; 581 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 582 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 583 sctx.newshift = PETSC_TRUE; 584 sctx.nshift++; 585 } 586 } while (sctx.newshift); 587 588 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 589 PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba)); 590 591 PetscCall(PetscFree(rtmp)); 592 PetscCall(ISRestoreIndices(isicol, &ic)); 593 PetscCall(ISRestoreIndices(isrow, &r)); 594 595 PetscCall(ISIdentity(isrow, &row_identity)); 596 PetscCall(ISIdentity(isicol, &col_identity)); 597 if (b->inode.size_csr) { 598 C->ops->solve = MatSolve_SeqAIJ_Inode; 599 } else if (row_identity && col_identity) { 600 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 601 } else { 602 C->ops->solve = MatSolve_SeqAIJ; 603 } 604 C->ops->solveadd = MatSolveAdd_SeqAIJ; 605 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 606 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 607 C->ops->matsolve = MatMatSolve_SeqAIJ; 608 C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ; 609 C->assembled = PETSC_TRUE; 610 C->preallocated = PETSC_TRUE; 611 612 PetscCall(PetscLogFlops(C->cmap->n)); 613 614 /* MatShiftView(A,info,&sctx) */ 615 if (sctx.nshift) { 616 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 617 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 618 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 619 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 620 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 621 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 622 } 623 } 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 627 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat); 628 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec); 629 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec); 630 631 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 632 { 633 Mat C = B; 634 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 635 IS isrow = b->row, isicol = b->icol; 636 const PetscInt *r, *ic, *ics; 637 PetscInt nz, row, i, j, n = A->rmap->n, diag; 638 const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 639 const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj; 640 MatScalar *pv, *rtmp, *pc, multiplier, d; 641 const MatScalar *v, *aa; 642 MatScalar *ba; 643 PetscReal rs = 0.0; 644 FactorShiftCtx sctx; 645 const PetscInt *ddiag; 646 PetscBool row_identity, col_identity; 647 648 PetscFunctionBegin; 649 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 650 PetscCall(MatSeqAIJGetArrayWrite(B, &ba)); 651 /* MatPivotSetUp(): initialize shift context sctx */ 652 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 653 654 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 655 ddiag = a->diag; 656 sctx.shift_top = info->zeropivot; 657 for (i = 0; i < n; i++) { 658 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 659 d = (aa)[ddiag[i]]; 660 rs = -PetscAbsScalar(d) - PetscRealPart(d); 661 v = aa + ai[i]; 662 nz = ai[i + 1] - ai[i]; 663 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 664 if (rs > sctx.shift_top) sctx.shift_top = rs; 665 } 666 sctx.shift_top *= 1.1; 667 sctx.nshift_max = 5; 668 sctx.shift_lo = 0.; 669 sctx.shift_hi = 1.; 670 } 671 672 PetscCall(ISGetIndices(isrow, &r)); 673 PetscCall(ISGetIndices(isicol, &ic)); 674 PetscCall(PetscMalloc1(n + 1, &rtmp)); 675 ics = ic; 676 677 do { 678 sctx.newshift = PETSC_FALSE; 679 for (i = 0; i < n; i++) { 680 nz = bi[i + 1] - bi[i]; 681 bjtmp = bj + bi[i]; 682 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 683 684 /* load in initial (unfactored row) */ 685 nz = ai[r[i] + 1] - ai[r[i]]; 686 ajtmp = aj + ai[r[i]]; 687 v = aa + ai[r[i]]; 688 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 689 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 690 691 row = *bjtmp++; 692 while (row < i) { 693 pc = rtmp + row; 694 if (*pc != 0.0) { 695 pv = ba + diag_offset[row]; 696 pj = b->j + diag_offset[row] + 1; 697 multiplier = *pc / *pv++; 698 *pc = multiplier; 699 nz = bi[row + 1] - diag_offset[row] - 1; 700 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 701 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 702 } 703 row = *bjtmp++; 704 } 705 /* finished row so stick it into b->a */ 706 pv = ba + bi[i]; 707 pj = b->j + bi[i]; 708 nz = bi[i + 1] - bi[i]; 709 diag = diag_offset[i] - bi[i]; 710 rs = 0.0; 711 for (j = 0; j < nz; j++) { 712 pv[j] = rtmp[pj[j]]; 713 rs += PetscAbsScalar(pv[j]); 714 } 715 rs -= PetscAbsScalar(pv[diag]); 716 717 sctx.rs = rs; 718 sctx.pv = pv[diag]; 719 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 720 if (sctx.newshift) break; 721 pv[diag] = sctx.pv; 722 } 723 724 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 725 /* 726 * if no shift in this attempt & shifting & started shifting & can refine, 727 * then try lower shift 728 */ 729 sctx.shift_hi = sctx.shift_fraction; 730 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 731 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 732 sctx.newshift = PETSC_TRUE; 733 sctx.nshift++; 734 } 735 } while (sctx.newshift); 736 737 /* invert diagonal entries for simpler triangular solves */ 738 for (i = 0; i < n; i++) ba[diag_offset[i]] = 1.0 / ba[diag_offset[i]]; 739 PetscCall(PetscFree(rtmp)); 740 PetscCall(ISRestoreIndices(isicol, &ic)); 741 PetscCall(ISRestoreIndices(isrow, &r)); 742 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 743 PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba)); 744 745 PetscCall(ISIdentity(isrow, &row_identity)); 746 PetscCall(ISIdentity(isicol, &col_identity)); 747 if (row_identity && col_identity) { 748 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 749 } else { 750 C->ops->solve = MatSolve_SeqAIJ_inplace; 751 } 752 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 753 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 754 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 755 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 756 C->ops->matsolvetranspose = NULL; 757 758 C->assembled = PETSC_TRUE; 759 C->preallocated = PETSC_TRUE; 760 761 PetscCall(PetscLogFlops(C->cmap->n)); 762 if (sctx.nshift) { 763 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 764 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 765 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 766 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 767 } 768 } 769 C->ops->solve = MatSolve_SeqAIJ_inplace; 770 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 771 772 PetscCall(MatSeqAIJCheckInode(C)); 773 PetscFunctionReturn(PETSC_SUCCESS); 774 } 775 776 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec); 777 778 /* 779 This routine implements inplace ILU(0) with row or/and column permutations. 780 Input: 781 A - original matrix 782 Output; 783 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 784 a->j (col index) is permuted by the inverse of colperm, then sorted 785 a->a reordered accordingly with a->j 786 a->diag (ptr to diagonal elements) is updated. 787 */ 788 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 789 { 790 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 791 IS isrow = a->row, isicol = a->icol; 792 const PetscInt *r, *ic, *ics; 793 PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 794 PetscInt *ajtmp, nz, row; 795 PetscInt *diag = a->diag, nbdiag, *pj; 796 PetscScalar *rtmp, *pc, multiplier, d; 797 MatScalar *pv, *v; 798 PetscReal rs; 799 FactorShiftCtx sctx; 800 MatScalar *aa, *vtmp; 801 802 PetscFunctionBegin; 803 PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 804 805 PetscCall(MatSeqAIJGetArray(A, &aa)); 806 /* MatPivotSetUp(): initialize shift context sctx */ 807 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 808 809 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 810 const PetscInt *ddiag = a->diag; 811 sctx.shift_top = info->zeropivot; 812 for (i = 0; i < n; i++) { 813 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 814 d = (aa)[ddiag[i]]; 815 rs = -PetscAbsScalar(d) - PetscRealPart(d); 816 vtmp = aa + ai[i]; 817 nz = ai[i + 1] - ai[i]; 818 for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 819 if (rs > sctx.shift_top) sctx.shift_top = rs; 820 } 821 sctx.shift_top *= 1.1; 822 sctx.nshift_max = 5; 823 sctx.shift_lo = 0.; 824 sctx.shift_hi = 1.; 825 } 826 827 PetscCall(ISGetIndices(isrow, &r)); 828 PetscCall(ISGetIndices(isicol, &ic)); 829 PetscCall(PetscMalloc1(n + 1, &rtmp)); 830 PetscCall(PetscArrayzero(rtmp, n + 1)); 831 ics = ic; 832 833 #if defined(MV) 834 sctx.shift_top = 0.; 835 sctx.nshift_max = 0; 836 sctx.shift_lo = 0.; 837 sctx.shift_hi = 0.; 838 sctx.shift_fraction = 0.; 839 840 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 841 sctx.shift_top = 0.; 842 for (i = 0; i < n; i++) { 843 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 844 d = aa[diag[i]]; 845 rs = -PetscAbsScalar(d) - PetscRealPart(d); 846 v = aa + ai[i]; 847 nz = ai[i + 1] - ai[i]; 848 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 849 if (rs > sctx.shift_top) sctx.shift_top = rs; 850 } 851 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 852 sctx.shift_top *= 1.1; 853 sctx.nshift_max = 5; 854 sctx.shift_lo = 0.; 855 sctx.shift_hi = 1.; 856 } 857 858 sctx.shift_amount = 0.; 859 sctx.nshift = 0; 860 #endif 861 862 do { 863 sctx.newshift = PETSC_FALSE; 864 for (i = 0; i < n; i++) { 865 /* load in initial unfactored row */ 866 nz = ai[r[i] + 1] - ai[r[i]]; 867 ajtmp = aj + ai[r[i]]; 868 v = aa + ai[r[i]]; 869 /* sort permuted ajtmp and values v accordingly */ 870 for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 871 PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 872 873 diag[r[i]] = ai[r[i]]; 874 for (j = 0; j < nz; j++) { 875 rtmp[ajtmp[j]] = v[j]; 876 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 877 } 878 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 879 880 row = *ajtmp++; 881 while (row < i) { 882 pc = rtmp + row; 883 if (*pc != 0.0) { 884 pv = aa + diag[r[row]]; 885 pj = aj + diag[r[row]] + 1; 886 887 multiplier = *pc / *pv++; 888 *pc = multiplier; 889 nz = ai[r[row] + 1] - diag[r[row]] - 1; 890 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 891 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 892 } 893 row = *ajtmp++; 894 } 895 /* finished row so overwrite it onto aa */ 896 pv = aa + ai[r[i]]; 897 pj = aj + ai[r[i]]; 898 nz = ai[r[i] + 1] - ai[r[i]]; 899 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 900 901 rs = 0.0; 902 for (j = 0; j < nz; j++) { 903 pv[j] = rtmp[pj[j]]; 904 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 905 } 906 907 sctx.rs = rs; 908 sctx.pv = pv[nbdiag]; 909 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 910 if (sctx.newshift) break; 911 pv[nbdiag] = sctx.pv; 912 } 913 914 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 915 /* 916 * if no shift in this attempt & shifting & started shifting & can refine, 917 * then try lower shift 918 */ 919 sctx.shift_hi = sctx.shift_fraction; 920 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 921 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 922 sctx.newshift = PETSC_TRUE; 923 sctx.nshift++; 924 } 925 } while (sctx.newshift); 926 927 /* invert diagonal entries for simpler triangular solves */ 928 for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]]; 929 930 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 931 PetscCall(PetscFree(rtmp)); 932 PetscCall(ISRestoreIndices(isicol, &ic)); 933 PetscCall(ISRestoreIndices(isrow, &r)); 934 935 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 936 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 937 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 938 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 939 940 A->assembled = PETSC_TRUE; 941 A->preallocated = PETSC_TRUE; 942 943 PetscCall(PetscLogFlops(A->cmap->n)); 944 if (sctx.nshift) { 945 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 946 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 947 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 948 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 949 } 950 } 951 PetscFunctionReturn(PETSC_SUCCESS); 952 } 953 954 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 955 { 956 Mat C; 957 958 PetscFunctionBegin; 959 PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 960 PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 961 PetscCall(MatLUFactorNumeric(C, A, info)); 962 963 A->ops->solve = C->ops->solve; 964 A->ops->solvetranspose = C->ops->solvetranspose; 965 966 PetscCall(MatHeaderMerge(A, &C)); 967 PetscFunctionReturn(PETSC_SUCCESS); 968 } 969 970 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 971 { 972 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 973 IS iscol = a->col, isrow = a->row; 974 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 975 PetscInt nz; 976 const PetscInt *rout, *cout, *r, *c; 977 PetscScalar *x, *tmp, *tmps, sum; 978 const PetscScalar *b; 979 const MatScalar *aa, *v; 980 981 PetscFunctionBegin; 982 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 983 984 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 985 PetscCall(VecGetArrayRead(bb, &b)); 986 PetscCall(VecGetArrayWrite(xx, &x)); 987 tmp = a->solve_work; 988 989 PetscCall(ISGetIndices(isrow, &rout)); 990 r = rout; 991 PetscCall(ISGetIndices(iscol, &cout)); 992 c = cout + (n - 1); 993 994 /* forward solve the lower triangular */ 995 tmp[0] = b[*r++]; 996 tmps = tmp; 997 for (i = 1; i < n; i++) { 998 v = aa + ai[i]; 999 vi = aj + ai[i]; 1000 nz = a->diag[i] - ai[i]; 1001 sum = b[*r++]; 1002 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1003 tmp[i] = sum; 1004 } 1005 1006 /* backward solve the upper triangular */ 1007 for (i = n - 1; i >= 0; i--) { 1008 v = aa + a->diag[i] + 1; 1009 vi = aj + a->diag[i] + 1; 1010 nz = ai[i + 1] - a->diag[i] - 1; 1011 sum = tmp[i]; 1012 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1013 x[*c--] = tmp[i] = sum * aa[a->diag[i]]; 1014 } 1015 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1016 PetscCall(ISRestoreIndices(isrow, &rout)); 1017 PetscCall(ISRestoreIndices(iscol, &cout)); 1018 PetscCall(VecRestoreArrayRead(bb, &b)); 1019 PetscCall(VecRestoreArrayWrite(xx, &x)); 1020 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1021 PetscFunctionReturn(PETSC_SUCCESS); 1022 } 1023 1024 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 1025 { 1026 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1027 IS iscol = a->col, isrow = a->row; 1028 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1029 PetscInt nz, neq, ldb, ldx; 1030 const PetscInt *rout, *cout, *r, *c; 1031 PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 1032 const PetscScalar *b, *aa, *v; 1033 PetscBool isdense; 1034 1035 PetscFunctionBegin; 1036 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1037 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1038 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1039 if (X != B) { 1040 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1041 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1042 } 1043 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1044 PetscCall(MatDenseGetArrayRead(B, &b)); 1045 PetscCall(MatDenseGetLDA(B, &ldb)); 1046 PetscCall(MatDenseGetArray(X, &x)); 1047 PetscCall(MatDenseGetLDA(X, &ldx)); 1048 PetscCall(ISGetIndices(isrow, &rout)); 1049 r = rout; 1050 PetscCall(ISGetIndices(iscol, &cout)); 1051 c = cout; 1052 for (neq = 0; neq < B->cmap->n; neq++) { 1053 /* forward solve the lower triangular */ 1054 tmp[0] = b[r[0]]; 1055 tmps = tmp; 1056 for (i = 1; i < n; i++) { 1057 v = aa + ai[i]; 1058 vi = aj + ai[i]; 1059 nz = a->diag[i] - ai[i]; 1060 sum = b[r[i]]; 1061 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1062 tmp[i] = sum; 1063 } 1064 /* backward solve the upper triangular */ 1065 for (i = n - 1; i >= 0; i--) { 1066 v = aa + a->diag[i] + 1; 1067 vi = aj + a->diag[i] + 1; 1068 nz = ai[i + 1] - a->diag[i] - 1; 1069 sum = tmp[i]; 1070 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1071 x[c[i]] = tmp[i] = sum * aa[a->diag[i]]; 1072 } 1073 b += ldb; 1074 x += ldx; 1075 } 1076 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1077 PetscCall(ISRestoreIndices(isrow, &rout)); 1078 PetscCall(ISRestoreIndices(iscol, &cout)); 1079 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1080 PetscCall(MatDenseRestoreArray(X, &x)); 1081 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 1086 { 1087 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1088 IS iscol = a->col, isrow = a->row; 1089 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1090 PetscInt nz, neq, ldb, ldx; 1091 const PetscInt *rout, *cout, *r, *c; 1092 PetscScalar *x, *tmp = a->solve_work, sum; 1093 const PetscScalar *b, *aa, *v; 1094 PetscBool isdense; 1095 1096 PetscFunctionBegin; 1097 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1098 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1099 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1100 if (X != B) { 1101 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1102 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1103 } 1104 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1105 PetscCall(MatDenseGetArrayRead(B, &b)); 1106 PetscCall(MatDenseGetLDA(B, &ldb)); 1107 PetscCall(MatDenseGetArray(X, &x)); 1108 PetscCall(MatDenseGetLDA(X, &ldx)); 1109 PetscCall(ISGetIndices(isrow, &rout)); 1110 r = rout; 1111 PetscCall(ISGetIndices(iscol, &cout)); 1112 c = cout; 1113 for (neq = 0; neq < B->cmap->n; neq++) { 1114 /* forward solve the lower triangular */ 1115 tmp[0] = b[r[0]]; 1116 v = aa; 1117 vi = aj; 1118 for (i = 1; i < n; i++) { 1119 nz = ai[i + 1] - ai[i]; 1120 sum = b[r[i]]; 1121 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1122 tmp[i] = sum; 1123 v += nz; 1124 vi += nz; 1125 } 1126 /* backward solve the upper triangular */ 1127 for (i = n - 1; i >= 0; i--) { 1128 v = aa + adiag[i + 1] + 1; 1129 vi = aj + adiag[i + 1] + 1; 1130 nz = adiag[i] - adiag[i + 1] - 1; 1131 sum = tmp[i]; 1132 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 1133 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 1134 } 1135 b += ldb; 1136 x += ldx; 1137 } 1138 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1139 PetscCall(ISRestoreIndices(isrow, &rout)); 1140 PetscCall(ISRestoreIndices(iscol, &cout)); 1141 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1142 PetscCall(MatDenseRestoreArray(X, &x)); 1143 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1144 PetscFunctionReturn(PETSC_SUCCESS); 1145 } 1146 1147 PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X) 1148 { 1149 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1150 IS iscol = a->col, isrow = a->row; 1151 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j; 1152 PetscInt nz, neq, ldb, ldx; 1153 const PetscInt *rout, *cout, *r, *c; 1154 PetscScalar *x, *tmp = a->solve_work, s1; 1155 const PetscScalar *b, *aa, *v; 1156 PetscBool isdense; 1157 1158 PetscFunctionBegin; 1159 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1160 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 1161 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 1162 if (X != B) { 1163 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 1164 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 1165 } 1166 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1167 PetscCall(MatDenseGetArrayRead(B, &b)); 1168 PetscCall(MatDenseGetLDA(B, &ldb)); 1169 PetscCall(MatDenseGetArray(X, &x)); 1170 PetscCall(MatDenseGetLDA(X, &ldx)); 1171 PetscCall(ISGetIndices(isrow, &rout)); 1172 r = rout; 1173 PetscCall(ISGetIndices(iscol, &cout)); 1174 c = cout; 1175 for (neq = 0; neq < B->cmap->n; neq++) { 1176 /* copy the b into temp work space according to permutation */ 1177 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1178 1179 /* forward solve the U^T */ 1180 for (i = 0; i < n; i++) { 1181 v = aa + adiag[i + 1] + 1; 1182 vi = aj + adiag[i + 1] + 1; 1183 nz = adiag[i] - adiag[i + 1] - 1; 1184 s1 = tmp[i]; 1185 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1186 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1187 tmp[i] = s1; 1188 } 1189 1190 /* backward solve the L^T */ 1191 for (i = n - 1; i >= 0; i--) { 1192 v = aa + ai[i]; 1193 vi = aj + ai[i]; 1194 nz = ai[i + 1] - ai[i]; 1195 s1 = tmp[i]; 1196 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1197 } 1198 1199 /* copy tmp into x according to permutation */ 1200 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1201 b += ldb; 1202 x += ldx; 1203 } 1204 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1205 PetscCall(ISRestoreIndices(isrow, &rout)); 1206 PetscCall(ISRestoreIndices(iscol, &cout)); 1207 PetscCall(MatDenseRestoreArrayRead(B, &b)); 1208 PetscCall(MatDenseRestoreArray(X, &x)); 1209 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 1210 PetscFunctionReturn(PETSC_SUCCESS); 1211 } 1212 1213 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 1214 { 1215 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1216 IS iscol = a->col, isrow = a->row; 1217 const PetscInt *r, *c, *rout, *cout; 1218 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1219 PetscInt nz, row; 1220 PetscScalar *x, *tmp, *tmps, sum; 1221 const PetscScalar *b; 1222 const MatScalar *aa, *v; 1223 1224 PetscFunctionBegin; 1225 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1226 1227 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1228 PetscCall(VecGetArrayRead(bb, &b)); 1229 PetscCall(VecGetArrayWrite(xx, &x)); 1230 tmp = a->solve_work; 1231 1232 PetscCall(ISGetIndices(isrow, &rout)); 1233 r = rout; 1234 PetscCall(ISGetIndices(iscol, &cout)); 1235 c = cout + (n - 1); 1236 1237 /* forward solve the lower triangular */ 1238 tmp[0] = b[*r++]; 1239 tmps = tmp; 1240 for (row = 1; row < n; row++) { 1241 i = rout[row]; /* permuted row */ 1242 v = aa + ai[i]; 1243 vi = aj + ai[i]; 1244 nz = a->diag[i] - ai[i]; 1245 sum = b[*r++]; 1246 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1247 tmp[row] = sum; 1248 } 1249 1250 /* backward solve the upper triangular */ 1251 for (row = n - 1; row >= 0; row--) { 1252 i = rout[row]; /* permuted row */ 1253 v = aa + a->diag[i] + 1; 1254 vi = aj + a->diag[i] + 1; 1255 nz = ai[i + 1] - a->diag[i] - 1; 1256 sum = tmp[row]; 1257 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1258 x[*c--] = tmp[row] = sum * aa[a->diag[i]]; 1259 } 1260 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1261 PetscCall(ISRestoreIndices(isrow, &rout)); 1262 PetscCall(ISRestoreIndices(iscol, &cout)); 1263 PetscCall(VecRestoreArrayRead(bb, &b)); 1264 PetscCall(VecRestoreArrayWrite(xx, &x)); 1265 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1266 PetscFunctionReturn(PETSC_SUCCESS); 1267 } 1268 1269 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1270 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1271 { 1272 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1273 PetscInt n = A->rmap->n; 1274 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag; 1275 PetscScalar *x; 1276 const PetscScalar *b; 1277 const MatScalar *aa; 1278 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1279 PetscInt adiag_i, i, nz, ai_i; 1280 const PetscInt *vi; 1281 const MatScalar *v; 1282 PetscScalar sum; 1283 #endif 1284 1285 PetscFunctionBegin; 1286 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1287 1288 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1289 PetscCall(VecGetArrayRead(bb, &b)); 1290 PetscCall(VecGetArrayWrite(xx, &x)); 1291 1292 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1293 fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 1294 #else 1295 /* forward solve the lower triangular */ 1296 x[0] = b[0]; 1297 for (i = 1; i < n; i++) { 1298 ai_i = ai[i]; 1299 v = aa + ai_i; 1300 vi = aj + ai_i; 1301 nz = adiag[i] - ai_i; 1302 sum = b[i]; 1303 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1304 x[i] = sum; 1305 } 1306 1307 /* backward solve the upper triangular */ 1308 for (i = n - 1; i >= 0; i--) { 1309 adiag_i = adiag[i]; 1310 v = aa + adiag_i + 1; 1311 vi = aj + adiag_i + 1; 1312 nz = ai[i + 1] - adiag_i - 1; 1313 sum = x[i]; 1314 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1315 x[i] = sum * aa[adiag_i]; 1316 } 1317 #endif 1318 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1319 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1320 PetscCall(VecRestoreArrayRead(bb, &b)); 1321 PetscCall(VecRestoreArrayWrite(xx, &x)); 1322 PetscFunctionReturn(PETSC_SUCCESS); 1323 } 1324 1325 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1326 { 1327 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1328 IS iscol = a->col, isrow = a->row; 1329 PetscInt i, n = A->rmap->n, j; 1330 PetscInt nz; 1331 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j; 1332 PetscScalar *x, *tmp, sum; 1333 const PetscScalar *b; 1334 const MatScalar *aa, *v; 1335 1336 PetscFunctionBegin; 1337 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1338 1339 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1340 PetscCall(VecGetArrayRead(bb, &b)); 1341 PetscCall(VecGetArray(xx, &x)); 1342 tmp = a->solve_work; 1343 1344 PetscCall(ISGetIndices(isrow, &rout)); 1345 r = rout; 1346 PetscCall(ISGetIndices(iscol, &cout)); 1347 c = cout + (n - 1); 1348 1349 /* forward solve the lower triangular */ 1350 tmp[0] = b[*r++]; 1351 for (i = 1; i < n; i++) { 1352 v = aa + ai[i]; 1353 vi = aj + ai[i]; 1354 nz = a->diag[i] - ai[i]; 1355 sum = b[*r++]; 1356 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1357 tmp[i] = sum; 1358 } 1359 1360 /* backward solve the upper triangular */ 1361 for (i = n - 1; i >= 0; i--) { 1362 v = aa + a->diag[i] + 1; 1363 vi = aj + a->diag[i] + 1; 1364 nz = ai[i + 1] - a->diag[i] - 1; 1365 sum = tmp[i]; 1366 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1367 tmp[i] = sum * aa[a->diag[i]]; 1368 x[*c--] += tmp[i]; 1369 } 1370 1371 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1372 PetscCall(ISRestoreIndices(isrow, &rout)); 1373 PetscCall(ISRestoreIndices(iscol, &cout)); 1374 PetscCall(VecRestoreArrayRead(bb, &b)); 1375 PetscCall(VecRestoreArray(xx, &x)); 1376 PetscCall(PetscLogFlops(2.0 * a->nz)); 1377 PetscFunctionReturn(PETSC_SUCCESS); 1378 } 1379 1380 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1381 { 1382 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1383 IS iscol = a->col, isrow = a->row; 1384 PetscInt i, n = A->rmap->n, j; 1385 PetscInt nz; 1386 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag; 1387 PetscScalar *x, *tmp, sum; 1388 const PetscScalar *b; 1389 const MatScalar *aa, *v; 1390 1391 PetscFunctionBegin; 1392 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1393 1394 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1395 PetscCall(VecGetArrayRead(bb, &b)); 1396 PetscCall(VecGetArray(xx, &x)); 1397 tmp = a->solve_work; 1398 1399 PetscCall(ISGetIndices(isrow, &rout)); 1400 r = rout; 1401 PetscCall(ISGetIndices(iscol, &cout)); 1402 c = cout; 1403 1404 /* forward solve the lower triangular */ 1405 tmp[0] = b[r[0]]; 1406 v = aa; 1407 vi = aj; 1408 for (i = 1; i < n; i++) { 1409 nz = ai[i + 1] - ai[i]; 1410 sum = b[r[i]]; 1411 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1412 tmp[i] = sum; 1413 v += nz; 1414 vi += nz; 1415 } 1416 1417 /* backward solve the upper triangular */ 1418 v = aa + adiag[n - 1]; 1419 vi = aj + adiag[n - 1]; 1420 for (i = n - 1; i >= 0; i--) { 1421 nz = adiag[i] - adiag[i + 1] - 1; 1422 sum = tmp[i]; 1423 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1424 tmp[i] = sum * v[nz]; 1425 x[c[i]] += tmp[i]; 1426 v += nz + 1; 1427 vi += nz + 1; 1428 } 1429 1430 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1431 PetscCall(ISRestoreIndices(isrow, &rout)); 1432 PetscCall(ISRestoreIndices(iscol, &cout)); 1433 PetscCall(VecRestoreArrayRead(bb, &b)); 1434 PetscCall(VecRestoreArray(xx, &x)); 1435 PetscCall(PetscLogFlops(2.0 * a->nz)); 1436 PetscFunctionReturn(PETSC_SUCCESS); 1437 } 1438 1439 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1440 { 1441 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1442 IS iscol = a->col, isrow = a->row; 1443 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1444 PetscInt i, n = A->rmap->n, j; 1445 PetscInt nz; 1446 PetscScalar *x, *tmp, s1; 1447 const MatScalar *aa, *v; 1448 const PetscScalar *b; 1449 1450 PetscFunctionBegin; 1451 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1452 PetscCall(VecGetArrayRead(bb, &b)); 1453 PetscCall(VecGetArrayWrite(xx, &x)); 1454 tmp = a->solve_work; 1455 1456 PetscCall(ISGetIndices(isrow, &rout)); 1457 r = rout; 1458 PetscCall(ISGetIndices(iscol, &cout)); 1459 c = cout; 1460 1461 /* copy the b into temp work space according to permutation */ 1462 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1463 1464 /* forward solve the U^T */ 1465 for (i = 0; i < n; i++) { 1466 v = aa + diag[i]; 1467 vi = aj + diag[i] + 1; 1468 nz = ai[i + 1] - diag[i] - 1; 1469 s1 = tmp[i]; 1470 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1471 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1472 tmp[i] = s1; 1473 } 1474 1475 /* backward solve the L^T */ 1476 for (i = n - 1; i >= 0; i--) { 1477 v = aa + diag[i] - 1; 1478 vi = aj + diag[i] - 1; 1479 nz = diag[i] - ai[i]; 1480 s1 = tmp[i]; 1481 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1482 } 1483 1484 /* copy tmp into x according to permutation */ 1485 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1486 1487 PetscCall(ISRestoreIndices(isrow, &rout)); 1488 PetscCall(ISRestoreIndices(iscol, &cout)); 1489 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1490 PetscCall(VecRestoreArrayRead(bb, &b)); 1491 PetscCall(VecRestoreArrayWrite(xx, &x)); 1492 1493 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1498 { 1499 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1500 IS iscol = a->col, isrow = a->row; 1501 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1502 PetscInt i, n = A->rmap->n, j; 1503 PetscInt nz; 1504 PetscScalar *x, *tmp, s1; 1505 const MatScalar *aa, *v; 1506 const PetscScalar *b; 1507 1508 PetscFunctionBegin; 1509 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1510 PetscCall(VecGetArrayRead(bb, &b)); 1511 PetscCall(VecGetArrayWrite(xx, &x)); 1512 tmp = a->solve_work; 1513 1514 PetscCall(ISGetIndices(isrow, &rout)); 1515 r = rout; 1516 PetscCall(ISGetIndices(iscol, &cout)); 1517 c = cout; 1518 1519 /* copy the b into temp work space according to permutation */ 1520 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1521 1522 /* forward solve the U^T */ 1523 for (i = 0; i < n; i++) { 1524 v = aa + adiag[i + 1] + 1; 1525 vi = aj + adiag[i + 1] + 1; 1526 nz = adiag[i] - adiag[i + 1] - 1; 1527 s1 = tmp[i]; 1528 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1529 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1530 tmp[i] = s1; 1531 } 1532 1533 /* backward solve the L^T */ 1534 for (i = n - 1; i >= 0; i--) { 1535 v = aa + ai[i]; 1536 vi = aj + ai[i]; 1537 nz = ai[i + 1] - ai[i]; 1538 s1 = tmp[i]; 1539 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1540 } 1541 1542 /* copy tmp into x according to permutation */ 1543 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1544 1545 PetscCall(ISRestoreIndices(isrow, &rout)); 1546 PetscCall(ISRestoreIndices(iscol, &cout)); 1547 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1548 PetscCall(VecRestoreArrayRead(bb, &b)); 1549 PetscCall(VecRestoreArrayWrite(xx, &x)); 1550 1551 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1552 PetscFunctionReturn(PETSC_SUCCESS); 1553 } 1554 1555 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1556 { 1557 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1558 IS iscol = a->col, isrow = a->row; 1559 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1560 PetscInt i, n = A->rmap->n, j; 1561 PetscInt nz; 1562 PetscScalar *x, *tmp, s1; 1563 const MatScalar *aa, *v; 1564 const PetscScalar *b; 1565 1566 PetscFunctionBegin; 1567 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1568 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1569 PetscCall(VecGetArrayRead(bb, &b)); 1570 PetscCall(VecGetArray(xx, &x)); 1571 tmp = a->solve_work; 1572 1573 PetscCall(ISGetIndices(isrow, &rout)); 1574 r = rout; 1575 PetscCall(ISGetIndices(iscol, &cout)); 1576 c = cout; 1577 1578 /* copy the b into temp work space according to permutation */ 1579 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1580 1581 /* forward solve the U^T */ 1582 for (i = 0; i < n; i++) { 1583 v = aa + diag[i]; 1584 vi = aj + diag[i] + 1; 1585 nz = ai[i + 1] - diag[i] - 1; 1586 s1 = tmp[i]; 1587 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1588 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1589 tmp[i] = s1; 1590 } 1591 1592 /* backward solve the L^T */ 1593 for (i = n - 1; i >= 0; i--) { 1594 v = aa + diag[i] - 1; 1595 vi = aj + diag[i] - 1; 1596 nz = diag[i] - ai[i]; 1597 s1 = tmp[i]; 1598 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1599 } 1600 1601 /* copy tmp into x according to permutation */ 1602 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1603 1604 PetscCall(ISRestoreIndices(isrow, &rout)); 1605 PetscCall(ISRestoreIndices(iscol, &cout)); 1606 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1607 PetscCall(VecRestoreArrayRead(bb, &b)); 1608 PetscCall(VecRestoreArray(xx, &x)); 1609 1610 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1611 PetscFunctionReturn(PETSC_SUCCESS); 1612 } 1613 1614 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1615 { 1616 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1617 IS iscol = a->col, isrow = a->row; 1618 const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi; 1619 PetscInt i, n = A->rmap->n, j; 1620 PetscInt nz; 1621 PetscScalar *x, *tmp, s1; 1622 const MatScalar *aa, *v; 1623 const PetscScalar *b; 1624 1625 PetscFunctionBegin; 1626 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1627 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1628 PetscCall(VecGetArrayRead(bb, &b)); 1629 PetscCall(VecGetArray(xx, &x)); 1630 tmp = a->solve_work; 1631 1632 PetscCall(ISGetIndices(isrow, &rout)); 1633 r = rout; 1634 PetscCall(ISGetIndices(iscol, &cout)); 1635 c = cout; 1636 1637 /* copy the b into temp work space according to permutation */ 1638 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1639 1640 /* forward solve the U^T */ 1641 for (i = 0; i < n; i++) { 1642 v = aa + adiag[i + 1] + 1; 1643 vi = aj + adiag[i + 1] + 1; 1644 nz = adiag[i] - adiag[i + 1] - 1; 1645 s1 = tmp[i]; 1646 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1647 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1648 tmp[i] = s1; 1649 } 1650 1651 /* backward solve the L^T */ 1652 for (i = n - 1; i >= 0; i--) { 1653 v = aa + ai[i]; 1654 vi = aj + ai[i]; 1655 nz = ai[i + 1] - ai[i]; 1656 s1 = tmp[i]; 1657 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1658 } 1659 1660 /* copy tmp into x according to permutation */ 1661 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1662 1663 PetscCall(ISRestoreIndices(isrow, &rout)); 1664 PetscCall(ISRestoreIndices(iscol, &cout)); 1665 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1666 PetscCall(VecRestoreArrayRead(bb, &b)); 1667 PetscCall(VecRestoreArray(xx, &x)); 1668 1669 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1670 PetscFunctionReturn(PETSC_SUCCESS); 1671 } 1672 1673 /* 1674 ilu() under revised new data structure. 1675 Factored arrays bj and ba are stored as 1676 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1677 1678 bi=fact->i is an array of size n+1, in which 1679 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1680 bi[n]: points to L(n-1,n-1)+1 1681 1682 bdiag=fact->diag is an array of size n+1,in which 1683 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1684 bdiag[n]: points to entry of U(n-1,0)-1 1685 1686 U(i,:) contains bdiag[i] as its last entry, i.e., 1687 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1688 */ 1689 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1690 { 1691 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1692 const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag; 1693 PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 1694 IS isicol; 1695 1696 PetscFunctionBegin; 1697 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1698 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1699 b = (Mat_SeqAIJ *)fact->data; 1700 1701 /* allocate matrix arrays for new data structure */ 1702 PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a)); 1703 PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j)); 1704 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i)); 1705 if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1706 b->free_a = PETSC_TRUE; 1707 b->free_ij = PETSC_TRUE; 1708 1709 if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag)); 1710 bdiag = b->diag; 1711 1712 /* set bi and bj with new data structure */ 1713 bi = b->i; 1714 bj = b->j; 1715 1716 /* L part */ 1717 bi[0] = 0; 1718 for (i = 0; i < n; i++) { 1719 nz = adiag[i] - ai[i]; 1720 bi[i + 1] = bi[i] + nz; 1721 aj = a->j + ai[i]; 1722 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1723 } 1724 1725 /* U part */ 1726 bdiag[n] = bi[n] - 1; 1727 for (i = n - 1; i >= 0; i--) { 1728 nz = ai[i + 1] - adiag[i] - 1; 1729 aj = a->j + adiag[i] + 1; 1730 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1731 /* diag[i] */ 1732 bj[k++] = i; 1733 bdiag[i] = bdiag[i + 1] + nz + 1; 1734 } 1735 1736 fact->factortype = MAT_FACTOR_ILU; 1737 fact->info.factor_mallocs = 0; 1738 fact->info.fill_ratio_given = info->fill; 1739 fact->info.fill_ratio_needed = 1.0; 1740 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1741 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1742 1743 b = (Mat_SeqAIJ *)fact->data; 1744 b->row = isrow; 1745 b->col = iscol; 1746 b->icol = isicol; 1747 PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work)); 1748 PetscCall(PetscObjectReference((PetscObject)isrow)); 1749 PetscCall(PetscObjectReference((PetscObject)iscol)); 1750 PetscFunctionReturn(PETSC_SUCCESS); 1751 } 1752 1753 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1754 { 1755 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1756 IS isicol; 1757 const PetscInt *r, *ic; 1758 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1759 PetscInt *bi, *cols, nnz, *cols_lvl; 1760 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1761 PetscInt i, levels, diagonal_fill; 1762 PetscBool col_identity, row_identity, missing; 1763 PetscReal f; 1764 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1765 PetscBT lnkbt; 1766 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1767 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1768 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1769 1770 PetscFunctionBegin; 1771 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1772 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1773 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1774 1775 levels = (PetscInt)info->levels; 1776 PetscCall(ISIdentity(isrow, &row_identity)); 1777 PetscCall(ISIdentity(iscol, &col_identity)); 1778 if (!levels && row_identity && col_identity) { 1779 /* special case: ilu(0) with natural ordering */ 1780 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1781 if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1782 PetscFunctionReturn(PETSC_SUCCESS); 1783 } 1784 1785 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1786 PetscCall(ISGetIndices(isrow, &r)); 1787 PetscCall(ISGetIndices(isicol, &ic)); 1788 1789 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1790 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 1791 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1792 bi[0] = bdiag[0] = 0; 1793 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1794 1795 /* create a linked list for storing column indices of the active row */ 1796 nlnk = n + 1; 1797 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1798 1799 /* initial FreeSpace size is f*(ai[n]+1) */ 1800 f = info->fill; 1801 diagonal_fill = (PetscInt)info->diagonal_fill; 1802 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1803 current_space = free_space; 1804 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1805 current_space_lvl = free_space_lvl; 1806 for (i = 0; i < n; i++) { 1807 nzi = 0; 1808 /* copy current row into linked list */ 1809 nnz = ai[r[i] + 1] - ai[r[i]]; 1810 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1811 cols = aj + ai[r[i]]; 1812 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1813 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1814 nzi += nlnk; 1815 1816 /* make sure diagonal entry is included */ 1817 if (diagonal_fill && lnk[i] == -1) { 1818 fm = n; 1819 while (lnk[fm] < i) fm = lnk[fm]; 1820 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1821 lnk[fm] = i; 1822 lnk_lvl[i] = 0; 1823 nzi++; 1824 dcount++; 1825 } 1826 1827 /* add pivot rows into the active row */ 1828 nzbd = 0; 1829 prow = lnk[n]; 1830 while (prow < i) { 1831 nnz = bdiag[prow]; 1832 cols = bj_ptr[prow] + nnz + 1; 1833 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1834 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1835 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1836 nzi += nlnk; 1837 prow = lnk[prow]; 1838 nzbd++; 1839 } 1840 bdiag[i] = nzbd; 1841 bi[i + 1] = bi[i] + nzi; 1842 /* if free space is not available, make more free space */ 1843 if (current_space->local_remaining < nzi) { 1844 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 1845 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1846 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1847 reallocs++; 1848 } 1849 1850 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1851 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1852 bj_ptr[i] = current_space->array; 1853 bjlvl_ptr[i] = current_space_lvl->array; 1854 1855 /* make sure the active row i has diagonal entry */ 1856 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1857 1858 current_space->array += nzi; 1859 current_space->local_used += nzi; 1860 current_space->local_remaining -= nzi; 1861 current_space_lvl->array += nzi; 1862 current_space_lvl->local_used += nzi; 1863 current_space_lvl->local_remaining -= nzi; 1864 } 1865 1866 PetscCall(ISRestoreIndices(isrow, &r)); 1867 PetscCall(ISRestoreIndices(isicol, &ic)); 1868 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1869 PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj)); 1870 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1871 1872 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1873 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1874 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1875 1876 #if defined(PETSC_USE_INFO) 1877 { 1878 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1879 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1880 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1881 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1882 PetscCall(PetscInfo(A, "for best performance.\n")); 1883 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1884 } 1885 #endif 1886 /* put together the new matrix */ 1887 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1888 b = (Mat_SeqAIJ *)fact->data; 1889 b->free_ij = PETSC_TRUE; 1890 PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 1891 b->free_a = PETSC_TRUE; 1892 b->j = bj; 1893 b->i = bi; 1894 b->diag = bdiag; 1895 b->ilen = NULL; 1896 b->imax = NULL; 1897 b->row = isrow; 1898 b->col = iscol; 1899 PetscCall(PetscObjectReference((PetscObject)isrow)); 1900 PetscCall(PetscObjectReference((PetscObject)iscol)); 1901 b->icol = isicol; 1902 1903 PetscCall(PetscMalloc1(n, &b->solve_work)); 1904 /* In b structure: Free imax, ilen, old a, old j. 1905 Allocate bdiag, solve_work, new a, new j */ 1906 b->maxnz = b->nz = bdiag[0] + 1; 1907 1908 fact->info.factor_mallocs = reallocs; 1909 fact->info.fill_ratio_given = f; 1910 fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1911 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1912 if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1913 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1914 PetscFunctionReturn(PETSC_SUCCESS); 1915 } 1916 1917 #if 0 1918 // unused 1919 static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1920 { 1921 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1922 IS isicol; 1923 const PetscInt *r, *ic; 1924 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1925 PetscInt *bi, *cols, nnz, *cols_lvl; 1926 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1927 PetscInt i, levels, diagonal_fill; 1928 PetscBool col_identity, row_identity; 1929 PetscReal f; 1930 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1931 PetscBT lnkbt; 1932 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1933 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1934 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1935 PetscBool missing; 1936 1937 PetscFunctionBegin; 1938 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1939 PetscCall(MatMissingDiagonal(A, &missing, &i)); 1940 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 1941 1942 f = info->fill; 1943 levels = (PetscInt)info->levels; 1944 diagonal_fill = (PetscInt)info->diagonal_fill; 1945 1946 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1947 1948 PetscCall(ISIdentity(isrow, &row_identity)); 1949 PetscCall(ISIdentity(iscol, &col_identity)); 1950 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1951 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE)); 1952 1953 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1954 if (a->inode.size_csr) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1955 fact->factortype = MAT_FACTOR_ILU; 1956 (fact)->info.factor_mallocs = 0; 1957 (fact)->info.fill_ratio_given = info->fill; 1958 (fact)->info.fill_ratio_needed = 1.0; 1959 1960 b = (Mat_SeqAIJ *)fact->data; 1961 b->row = isrow; 1962 b->col = iscol; 1963 b->icol = isicol; 1964 PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work)); 1965 PetscCall(PetscObjectReference((PetscObject)isrow)); 1966 PetscCall(PetscObjectReference((PetscObject)iscol)); 1967 PetscFunctionReturn(PETSC_SUCCESS); 1968 } 1969 1970 PetscCall(ISGetIndices(isrow, &r)); 1971 PetscCall(ISGetIndices(isicol, &ic)); 1972 1973 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1974 PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi)); 1975 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1976 bi[0] = bdiag[0] = 0; 1977 1978 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1979 1980 /* create a linked list for storing column indices of the active row */ 1981 nlnk = n + 1; 1982 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1983 1984 /* initial FreeSpace size is f*(ai[n]+1) */ 1985 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1986 current_space = free_space; 1987 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1988 current_space_lvl = free_space_lvl; 1989 1990 for (i = 0; i < n; i++) { 1991 nzi = 0; 1992 /* copy current row into linked list */ 1993 nnz = ai[r[i] + 1] - ai[r[i]]; 1994 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1995 cols = aj + ai[r[i]]; 1996 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1997 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1998 nzi += nlnk; 1999 2000 /* make sure diagonal entry is included */ 2001 if (diagonal_fill && lnk[i] == -1) { 2002 fm = n; 2003 while (lnk[fm] < i) fm = lnk[fm]; 2004 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 2005 lnk[fm] = i; 2006 lnk_lvl[i] = 0; 2007 nzi++; 2008 dcount++; 2009 } 2010 2011 /* add pivot rows into the active row */ 2012 nzbd = 0; 2013 prow = lnk[n]; 2014 while (prow < i) { 2015 nnz = bdiag[prow]; 2016 cols = bj_ptr[prow] + nnz + 1; 2017 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 2018 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 2019 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 2020 nzi += nlnk; 2021 prow = lnk[prow]; 2022 nzbd++; 2023 } 2024 bdiag[i] = nzbd; 2025 bi[i + 1] = bi[i] + nzi; 2026 2027 /* if free space is not available, make more free space */ 2028 if (current_space->local_remaining < nzi) { 2029 nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */ 2030 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 2031 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 2032 reallocs++; 2033 } 2034 2035 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2036 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2037 bj_ptr[i] = current_space->array; 2038 bjlvl_ptr[i] = current_space_lvl->array; 2039 2040 /* make sure the active row i has diagonal entry */ 2041 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 2042 2043 current_space->array += nzi; 2044 current_space->local_used += nzi; 2045 current_space->local_remaining -= nzi; 2046 current_space_lvl->array += nzi; 2047 current_space_lvl->local_used += nzi; 2048 current_space_lvl->local_remaining -= nzi; 2049 } 2050 2051 PetscCall(ISRestoreIndices(isrow, &r)); 2052 PetscCall(ISRestoreIndices(isicol, &ic)); 2053 2054 /* destroy list of free space and other temporary arrays */ 2055 PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj)); 2056 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */ 2057 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2058 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2059 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 2060 2061 #if defined(PETSC_USE_INFO) 2062 { 2063 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2064 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 2065 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 2066 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 2067 PetscCall(PetscInfo(A, "for best performance.\n")); 2068 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 2069 } 2070 #endif 2071 2072 /* put together the new matrix */ 2073 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 2074 b = (Mat_SeqAIJ *)fact->data; 2075 b->free_ij = PETSC_TRUE; 2076 PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a)); 2077 b->free_a = PETSC_TRUE; 2078 b->j = bj; 2079 b->i = bi; 2080 for (i = 0; i < n; i++) bdiag[i] += bi[i]; 2081 b->diag = bdiag; 2082 b->ilen = NULL; 2083 b->imax = NULL; 2084 b->row = isrow; 2085 b->col = iscol; 2086 PetscCall(PetscObjectReference((PetscObject)isrow)); 2087 PetscCall(PetscObjectReference((PetscObject)iscol)); 2088 b->icol = isicol; 2089 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 2090 /* In b structure: Free imax, ilen, old a, old j. 2091 Allocate bdiag, solve_work, new a, new j */ 2092 b->maxnz = b->nz = bi[n]; 2093 2094 fact->info.factor_mallocs = reallocs; 2095 fact->info.fill_ratio_given = f; 2096 fact->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 2097 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2098 if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2099 PetscFunctionReturn(PETSC_SUCCESS); 2100 } 2101 #endif 2102 2103 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 2104 { 2105 Mat C = B; 2106 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2107 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2108 IS ip = b->row, iip = b->icol; 2109 const PetscInt *rip, *riip; 2110 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 2111 PetscInt *ai = a->i, *aj = a->j; 2112 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 2113 MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi; 2114 PetscBool perm_identity; 2115 FactorShiftCtx sctx; 2116 PetscReal rs; 2117 const MatScalar *aa, *v; 2118 MatScalar d; 2119 2120 PetscFunctionBegin; 2121 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 2122 /* MatPivotSetUp(): initialize shift context sctx */ 2123 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2124 2125 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2126 sctx.shift_top = info->zeropivot; 2127 for (i = 0; i < mbs; i++) { 2128 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2129 d = (aa)[a->diag[i]]; 2130 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2131 v = aa + ai[i]; 2132 nz = ai[i + 1] - ai[i]; 2133 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2134 if (rs > sctx.shift_top) sctx.shift_top = rs; 2135 } 2136 sctx.shift_top *= 1.1; 2137 sctx.nshift_max = 5; 2138 sctx.shift_lo = 0.; 2139 sctx.shift_hi = 1.; 2140 } 2141 2142 PetscCall(ISGetIndices(ip, &rip)); 2143 PetscCall(ISGetIndices(iip, &riip)); 2144 2145 /* allocate working arrays 2146 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2147 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2148 */ 2149 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 2150 2151 do { 2152 sctx.newshift = PETSC_FALSE; 2153 2154 for (i = 0; i < mbs; i++) c2r[i] = mbs; 2155 if (mbs) il[0] = 0; 2156 2157 for (k = 0; k < mbs; k++) { 2158 /* zero rtmp */ 2159 nz = bi[k + 1] - bi[k]; 2160 bjtmp = bj + bi[k]; 2161 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2162 2163 /* load in initial unfactored row */ 2164 bval = ba + bi[k]; 2165 jmin = ai[rip[k]]; 2166 jmax = ai[rip[k] + 1]; 2167 for (j = jmin; j < jmax; j++) { 2168 col = riip[aj[j]]; 2169 if (col >= k) { /* only take upper triangular entry */ 2170 rtmp[col] = aa[j]; 2171 *bval++ = 0.0; /* for in-place factorization */ 2172 } 2173 } 2174 /* shift the diagonal of the matrix: ZeropivotApply() */ 2175 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2176 2177 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2178 dk = rtmp[k]; 2179 i = c2r[k]; /* first row to be added to k_th row */ 2180 2181 while (i < k) { 2182 nexti = c2r[i]; /* next row to be added to k_th row */ 2183 2184 /* compute multiplier, update diag(k) and U(i,k) */ 2185 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2186 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 2187 dk += uikdi * ba[ili]; /* update diag[k] */ 2188 ba[ili] = uikdi; /* -U(i,k) */ 2189 2190 /* add multiple of row i to k-th row */ 2191 jmin = ili + 1; 2192 jmax = bi[i + 1]; 2193 if (jmin < jmax) { 2194 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2195 /* update il and c2r for row i */ 2196 il[i] = jmin; 2197 j = bj[jmin]; 2198 c2r[i] = c2r[j]; 2199 c2r[j] = i; 2200 } 2201 i = nexti; 2202 } 2203 2204 /* copy data into U(k,:) */ 2205 rs = 0.0; 2206 jmin = bi[k]; 2207 jmax = bi[k + 1] - 1; 2208 if (jmin < jmax) { 2209 for (j = jmin; j < jmax; j++) { 2210 col = bj[j]; 2211 ba[j] = rtmp[col]; 2212 rs += PetscAbsScalar(ba[j]); 2213 } 2214 /* add the k-th row into il and c2r */ 2215 il[k] = jmin; 2216 i = bj[jmin]; 2217 c2r[k] = c2r[i]; 2218 c2r[i] = k; 2219 } 2220 2221 /* MatPivotCheck() */ 2222 sctx.rs = rs; 2223 sctx.pv = dk; 2224 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 2225 if (sctx.newshift) break; 2226 dk = sctx.pv; 2227 2228 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 2229 } 2230 } while (sctx.newshift); 2231 2232 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 2233 PetscCall(PetscFree3(rtmp, il, c2r)); 2234 PetscCall(ISRestoreIndices(ip, &rip)); 2235 PetscCall(ISRestoreIndices(iip, &riip)); 2236 2237 PetscCall(ISIdentity(ip, &perm_identity)); 2238 if (perm_identity) { 2239 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2240 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2241 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2242 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2243 } else { 2244 B->ops->solve = MatSolve_SeqSBAIJ_1; 2245 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2246 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2247 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2248 } 2249 2250 C->assembled = PETSC_TRUE; 2251 C->preallocated = PETSC_TRUE; 2252 2253 PetscCall(PetscLogFlops(C->rmap->n)); 2254 2255 /* MatPivotView() */ 2256 if (sctx.nshift) { 2257 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2258 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 2259 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2260 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2261 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2262 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 2263 } 2264 } 2265 PetscFunctionReturn(PETSC_SUCCESS); 2266 } 2267 2268 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 2269 { 2270 Mat C = B; 2271 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2272 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 2273 IS ip = b->row, iip = b->icol; 2274 const PetscInt *rip, *riip; 2275 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 2276 PetscInt *ai = a->i, *aj = a->j; 2277 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 2278 MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi; 2279 const MatScalar *aa, *v; 2280 PetscBool perm_identity; 2281 FactorShiftCtx sctx; 2282 PetscReal rs; 2283 MatScalar d; 2284 2285 PetscFunctionBegin; 2286 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 2287 /* MatPivotSetUp(): initialize shift context sctx */ 2288 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 2289 2290 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2291 sctx.shift_top = info->zeropivot; 2292 for (i = 0; i < mbs; i++) { 2293 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2294 d = (aa)[a->diag[i]]; 2295 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2296 v = aa + ai[i]; 2297 nz = ai[i + 1] - ai[i]; 2298 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 2299 if (rs > sctx.shift_top) sctx.shift_top = rs; 2300 } 2301 sctx.shift_top *= 1.1; 2302 sctx.nshift_max = 5; 2303 sctx.shift_lo = 0.; 2304 sctx.shift_hi = 1.; 2305 } 2306 2307 PetscCall(ISGetIndices(ip, &rip)); 2308 PetscCall(ISGetIndices(iip, &riip)); 2309 2310 /* initialization */ 2311 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 2312 2313 do { 2314 sctx.newshift = PETSC_FALSE; 2315 2316 for (i = 0; i < mbs; i++) jl[i] = mbs; 2317 il[0] = 0; 2318 2319 for (k = 0; k < mbs; k++) { 2320 /* zero rtmp */ 2321 nz = bi[k + 1] - bi[k]; 2322 bjtmp = bj + bi[k]; 2323 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 2324 2325 bval = ba + bi[k]; 2326 /* initialize k-th row by the perm[k]-th row of A */ 2327 jmin = ai[rip[k]]; 2328 jmax = ai[rip[k] + 1]; 2329 for (j = jmin; j < jmax; j++) { 2330 col = riip[aj[j]]; 2331 if (col >= k) { /* only take upper triangular entry */ 2332 rtmp[col] = aa[j]; 2333 *bval++ = 0.0; /* for in-place factorization */ 2334 } 2335 } 2336 /* shift the diagonal of the matrix */ 2337 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2338 2339 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2340 dk = rtmp[k]; 2341 i = jl[k]; /* first row to be added to k_th row */ 2342 2343 while (i < k) { 2344 nexti = jl[i]; /* next row to be added to k_th row */ 2345 2346 /* compute multiplier, update diag(k) and U(i,k) */ 2347 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2348 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 2349 dk += uikdi * ba[ili]; 2350 ba[ili] = uikdi; /* -U(i,k) */ 2351 2352 /* add multiple of row i to k-th row */ 2353 jmin = ili + 1; 2354 jmax = bi[i + 1]; 2355 if (jmin < jmax) { 2356 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 2357 /* update il and jl for row i */ 2358 il[i] = jmin; 2359 j = bj[jmin]; 2360 jl[i] = jl[j]; 2361 jl[j] = i; 2362 } 2363 i = nexti; 2364 } 2365 2366 /* shift the diagonals when zero pivot is detected */ 2367 /* compute rs=sum of abs(off-diagonal) */ 2368 rs = 0.0; 2369 jmin = bi[k] + 1; 2370 nz = bi[k + 1] - jmin; 2371 bcol = bj + jmin; 2372 for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 2373 2374 sctx.rs = rs; 2375 sctx.pv = dk; 2376 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 2377 if (sctx.newshift) break; 2378 dk = sctx.pv; 2379 2380 /* copy data into U(k,:) */ 2381 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 2382 jmin = bi[k] + 1; 2383 jmax = bi[k + 1]; 2384 if (jmin < jmax) { 2385 for (j = jmin; j < jmax; j++) { 2386 col = bj[j]; 2387 ba[j] = rtmp[col]; 2388 } 2389 /* add the k-th row into il and jl */ 2390 il[k] = jmin; 2391 i = bj[jmin]; 2392 jl[k] = jl[i]; 2393 jl[i] = k; 2394 } 2395 } 2396 } while (sctx.newshift); 2397 2398 PetscCall(PetscFree3(rtmp, il, jl)); 2399 PetscCall(ISRestoreIndices(ip, &rip)); 2400 PetscCall(ISRestoreIndices(iip, &riip)); 2401 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 2402 2403 PetscCall(ISIdentity(ip, &perm_identity)); 2404 if (perm_identity) { 2405 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2406 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2407 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2408 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2409 } else { 2410 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2411 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2412 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2413 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2414 } 2415 2416 C->assembled = PETSC_TRUE; 2417 C->preallocated = PETSC_TRUE; 2418 2419 PetscCall(PetscLogFlops(C->rmap->n)); 2420 if (sctx.nshift) { 2421 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2422 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2423 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2424 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2425 } 2426 } 2427 PetscFunctionReturn(PETSC_SUCCESS); 2428 } 2429 2430 /* 2431 icc() under revised new data structure. 2432 Factored arrays bj and ba are stored as 2433 U(0,:),...,U(i,:),U(n-1,:) 2434 2435 ui=fact->i is an array of size n+1, in which 2436 ui+ 2437 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2438 ui[n]: points to U(n-1,n-1)+1 2439 2440 udiag=fact->diag is an array of size n,in which 2441 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2442 2443 U(i,:) contains udiag[i] as its last entry, i.e., 2444 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2445 */ 2446 2447 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2448 { 2449 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2450 Mat_SeqSBAIJ *b; 2451 PetscBool perm_identity, missing; 2452 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2453 const PetscInt *rip, *riip; 2454 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2455 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2456 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2457 PetscReal fill = info->fill, levels = info->levels; 2458 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2459 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2460 PetscBT lnkbt; 2461 IS iperm; 2462 2463 PetscFunctionBegin; 2464 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2465 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2466 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2467 PetscCall(ISIdentity(perm, &perm_identity)); 2468 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2469 2470 PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 2471 PetscCall(PetscMalloc1(am + 1, &udiag)); 2472 ui[0] = 0; 2473 2474 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2475 if (!levels && perm_identity) { 2476 for (i = 0; i < am; i++) { 2477 ncols = ai[i + 1] - a->diag[i]; 2478 ui[i + 1] = ui[i] + ncols; 2479 udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2480 } 2481 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2482 cols = uj; 2483 for (i = 0; i < am; i++) { 2484 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2485 ncols = ai[i + 1] - a->diag[i] - 1; 2486 for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2487 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2488 } 2489 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2490 PetscCall(ISGetIndices(iperm, &riip)); 2491 PetscCall(ISGetIndices(perm, &rip)); 2492 2493 /* initialization */ 2494 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2495 2496 /* jl: linked list for storing indices of the pivot rows 2497 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2498 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2499 for (i = 0; i < am; i++) { 2500 jl[i] = am; 2501 il[i] = 0; 2502 } 2503 2504 /* create and initialize a linked list for storing column indices of the active row k */ 2505 nlnk = am + 1; 2506 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2507 2508 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2509 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2510 current_space = free_space; 2511 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 2512 current_space_lvl = free_space_lvl; 2513 2514 for (k = 0; k < am; k++) { /* for each active row k */ 2515 /* initialize lnk by the column indices of row rip[k] of A */ 2516 nzk = 0; 2517 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2518 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2519 ncols_upper = 0; 2520 for (j = 0; j < ncols; j++) { 2521 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2522 if (riip[i] >= k) { /* only take upper triangular entry */ 2523 ajtmp[ncols_upper] = i; 2524 ncols_upper++; 2525 } 2526 } 2527 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2528 nzk += nlnk; 2529 2530 /* update lnk by computing fill-in for each pivot row to be merged in */ 2531 prow = jl[k]; /* 1st pivot row */ 2532 2533 while (prow < k) { 2534 nextprow = jl[prow]; 2535 2536 /* merge prow into k-th row */ 2537 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2538 jmax = ui[prow + 1]; 2539 ncols = jmax - jmin; 2540 i = jmin - ui[prow]; 2541 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2542 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2543 j = *(uj - 1); 2544 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2545 nzk += nlnk; 2546 2547 /* update il and jl for prow */ 2548 if (jmin < jmax) { 2549 il[prow] = jmin; 2550 j = *cols; 2551 jl[prow] = jl[j]; 2552 jl[j] = prow; 2553 } 2554 prow = nextprow; 2555 } 2556 2557 /* if free space is not available, make more free space */ 2558 if (current_space->local_remaining < nzk) { 2559 i = am - k + 1; /* num of unfactored rows */ 2560 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2561 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2562 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2563 reallocs++; 2564 } 2565 2566 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2567 PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2568 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2569 2570 /* add the k-th row into il and jl */ 2571 if (nzk > 1) { 2572 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2573 jl[k] = jl[i]; 2574 jl[i] = k; 2575 il[k] = ui[k] + 1; 2576 } 2577 uj_ptr[k] = current_space->array; 2578 uj_lvl_ptr[k] = current_space_lvl->array; 2579 2580 current_space->array += nzk; 2581 current_space->local_used += nzk; 2582 current_space->local_remaining -= nzk; 2583 2584 current_space_lvl->array += nzk; 2585 current_space_lvl->local_used += nzk; 2586 current_space_lvl->local_remaining -= nzk; 2587 2588 ui[k + 1] = ui[k] + nzk; 2589 } 2590 2591 PetscCall(ISRestoreIndices(perm, &rip)); 2592 PetscCall(ISRestoreIndices(iperm, &riip)); 2593 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2594 PetscCall(PetscFree(ajtmp)); 2595 2596 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2597 PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj)); 2598 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2599 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2600 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2601 2602 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2603 2604 /* put together the new matrix in MATSEQSBAIJ format */ 2605 b = (Mat_SeqSBAIJ *)fact->data; 2606 b->free_ij = PETSC_TRUE; 2607 PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a)); 2608 b->free_a = PETSC_TRUE; 2609 b->j = uj; 2610 b->i = ui; 2611 b->diag = udiag; 2612 b->free_diag = PETSC_TRUE; 2613 b->ilen = NULL; 2614 b->imax = NULL; 2615 b->row = perm; 2616 b->col = perm; 2617 PetscCall(PetscObjectReference((PetscObject)perm)); 2618 PetscCall(PetscObjectReference((PetscObject)perm)); 2619 b->icol = iperm; 2620 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2621 2622 PetscCall(PetscMalloc1(am, &b->solve_work)); 2623 2624 b->maxnz = b->nz = ui[am]; 2625 2626 fact->info.factor_mallocs = reallocs; 2627 fact->info.fill_ratio_given = fill; 2628 if (ai[am] != 0) { 2629 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2630 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2631 } else { 2632 fact->info.fill_ratio_needed = 0.0; 2633 } 2634 #if defined(PETSC_USE_INFO) 2635 if (ai[am] != 0) { 2636 PetscReal af = fact->info.fill_ratio_needed; 2637 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2638 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2639 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2640 } else { 2641 PetscCall(PetscInfo(A, "Empty matrix\n")); 2642 } 2643 #endif 2644 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2645 PetscFunctionReturn(PETSC_SUCCESS); 2646 } 2647 2648 #if 0 2649 // unused 2650 static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2651 { 2652 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2653 Mat_SeqSBAIJ *b; 2654 PetscBool perm_identity, missing; 2655 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2656 const PetscInt *rip, *riip; 2657 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2658 PetscInt nlnk, *lnk, *lnk_lvl = NULL, d; 2659 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2660 PetscReal fill = info->fill, levels = info->levels; 2661 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2662 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2663 PetscBT lnkbt; 2664 IS iperm; 2665 2666 PetscFunctionBegin; 2667 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2668 PetscCall(MatMissingDiagonal(A, &missing, &d)); 2669 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 2670 PetscCall(ISIdentity(perm, &perm_identity)); 2671 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2672 2673 PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui)); 2674 PetscCall(PetscMalloc1(am + 1, &udiag)); 2675 ui[0] = 0; 2676 2677 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2678 if (!levels && perm_identity) { 2679 for (i = 0; i < am; i++) { 2680 ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i]; 2681 udiag[i] = ui[i]; 2682 } 2683 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2684 cols = uj; 2685 for (i = 0; i < am; i++) { 2686 aj = a->j + a->diag[i]; 2687 ncols = ui[i + 1] - ui[i]; 2688 for (j = 0; j < ncols; j++) *cols++ = *aj++; 2689 } 2690 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2691 PetscCall(ISGetIndices(iperm, &riip)); 2692 PetscCall(ISGetIndices(perm, &rip)); 2693 2694 /* initialization */ 2695 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2696 2697 /* jl: linked list for storing indices of the pivot rows 2698 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2699 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2700 for (i = 0; i < am; i++) { 2701 jl[i] = am; 2702 il[i] = 0; 2703 } 2704 2705 /* create and initialize a linked list for storing column indices of the active row k */ 2706 nlnk = am + 1; 2707 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2708 2709 /* initial FreeSpace size is fill*(ai[am]+1) */ 2710 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 2711 current_space = free_space; 2712 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl)); 2713 current_space_lvl = free_space_lvl; 2714 2715 for (k = 0; k < am; k++) { /* for each active row k */ 2716 /* initialize lnk by the column indices of row rip[k] of A */ 2717 nzk = 0; 2718 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2719 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2720 ncols_upper = 0; 2721 for (j = 0; j < ncols; j++) { 2722 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2723 if (riip[i] >= k) { /* only take upper triangular entry */ 2724 ajtmp[ncols_upper] = i; 2725 ncols_upper++; 2726 } 2727 } 2728 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2729 nzk += nlnk; 2730 2731 /* update lnk by computing fill-in for each pivot row to be merged in */ 2732 prow = jl[k]; /* 1st pivot row */ 2733 2734 while (prow < k) { 2735 nextprow = jl[prow]; 2736 2737 /* merge prow into k-th row */ 2738 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2739 jmax = ui[prow + 1]; 2740 ncols = jmax - jmin; 2741 i = jmin - ui[prow]; 2742 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2743 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2744 j = *(uj - 1); 2745 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2746 nzk += nlnk; 2747 2748 /* update il and jl for prow */ 2749 if (jmin < jmax) { 2750 il[prow] = jmin; 2751 j = *cols; 2752 jl[prow] = jl[j]; 2753 jl[j] = prow; 2754 } 2755 prow = nextprow; 2756 } 2757 2758 /* if free space is not available, make more free space */ 2759 if (current_space->local_remaining < nzk) { 2760 i = am - k + 1; /* num of unfactored rows */ 2761 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2762 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2763 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2764 reallocs++; 2765 } 2766 2767 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2768 PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2769 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2770 2771 /* add the k-th row into il and jl */ 2772 if (nzk > 1) { 2773 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2774 jl[k] = jl[i]; 2775 jl[i] = k; 2776 il[k] = ui[k] + 1; 2777 } 2778 uj_ptr[k] = current_space->array; 2779 uj_lvl_ptr[k] = current_space_lvl->array; 2780 2781 current_space->array += nzk; 2782 current_space->local_used += nzk; 2783 current_space->local_remaining -= nzk; 2784 2785 current_space_lvl->array += nzk; 2786 current_space_lvl->local_used += nzk; 2787 current_space_lvl->local_remaining -= nzk; 2788 2789 ui[k + 1] = ui[k] + nzk; 2790 } 2791 2792 #if defined(PETSC_USE_INFO) 2793 if (ai[am] != 0) { 2794 PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]); 2795 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2796 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2797 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2798 } else { 2799 PetscCall(PetscInfo(A, "Empty matrix\n")); 2800 } 2801 #endif 2802 2803 PetscCall(ISRestoreIndices(perm, &rip)); 2804 PetscCall(ISRestoreIndices(iperm, &riip)); 2805 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2806 PetscCall(PetscFree(ajtmp)); 2807 2808 /* destroy list of free space and other temporary array(s) */ 2809 PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj)); 2810 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 2811 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2812 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2813 2814 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2815 2816 /* put together the new matrix in MATSEQSBAIJ format */ 2817 2818 b = (Mat_SeqSBAIJ *)fact->data; 2819 b->free_ij = PETSC_TRUE; 2820 PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a)); 2821 b->free_a = PETSC_TRUE; 2822 b->j = uj; 2823 b->i = ui; 2824 b->diag = udiag; 2825 b->free_diag = PETSC_TRUE; 2826 b->ilen = NULL; 2827 b->imax = NULL; 2828 b->row = perm; 2829 b->col = perm; 2830 2831 PetscCall(PetscObjectReference((PetscObject)perm)); 2832 PetscCall(PetscObjectReference((PetscObject)perm)); 2833 2834 b->icol = iperm; 2835 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2836 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 2837 b->maxnz = b->nz = ui[am]; 2838 2839 fact->info.factor_mallocs = reallocs; 2840 fact->info.fill_ratio_given = fill; 2841 if (ai[am] != 0) { 2842 fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]); 2843 } else { 2844 fact->info.fill_ratio_needed = 0.0; 2845 } 2846 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2847 PetscFunctionReturn(PETSC_SUCCESS); 2848 } 2849 #endif 2850 2851 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2852 { 2853 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2854 Mat_SeqSBAIJ *b; 2855 PetscBool perm_identity, missing; 2856 PetscReal fill = info->fill; 2857 const PetscInt *rip, *riip; 2858 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2859 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2860 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 2861 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2862 PetscBT lnkbt; 2863 IS iperm; 2864 2865 PetscFunctionBegin; 2866 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2867 PetscCall(MatMissingDiagonal(A, &missing, &i)); 2868 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 2869 2870 /* check whether perm is the identity mapping */ 2871 PetscCall(ISIdentity(perm, &perm_identity)); 2872 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2873 PetscCall(ISGetIndices(iperm, &riip)); 2874 PetscCall(ISGetIndices(perm, &rip)); 2875 2876 /* initialization */ 2877 PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 2878 PetscCall(PetscMalloc1(am + 1, &udiag)); 2879 ui[0] = 0; 2880 2881 /* jl: linked list for storing indices of the pivot rows 2882 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2883 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2884 for (i = 0; i < am; i++) { 2885 jl[i] = am; 2886 il[i] = 0; 2887 } 2888 2889 /* create and initialize a linked list for storing column indices of the active row k */ 2890 nlnk = am + 1; 2891 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2892 2893 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2894 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2895 current_space = free_space; 2896 2897 for (k = 0; k < am; k++) { /* for each active row k */ 2898 /* initialize lnk by the column indices of row rip[k] of A */ 2899 nzk = 0; 2900 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2901 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2902 ncols_upper = 0; 2903 for (j = 0; j < ncols; j++) { 2904 i = riip[*(aj + ai[rip[k]] + j)]; 2905 if (i >= k) { /* only take upper triangular entry */ 2906 cols[ncols_upper] = i; 2907 ncols_upper++; 2908 } 2909 } 2910 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2911 nzk += nlnk; 2912 2913 /* update lnk by computing fill-in for each pivot row to be merged in */ 2914 prow = jl[k]; /* 1st pivot row */ 2915 2916 while (prow < k) { 2917 nextprow = jl[prow]; 2918 /* merge prow into k-th row */ 2919 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2920 jmax = ui[prow + 1]; 2921 ncols = jmax - jmin; 2922 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2923 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2924 nzk += nlnk; 2925 2926 /* update il and jl for prow */ 2927 if (jmin < jmax) { 2928 il[prow] = jmin; 2929 j = *uj_ptr; 2930 jl[prow] = jl[j]; 2931 jl[j] = prow; 2932 } 2933 prow = nextprow; 2934 } 2935 2936 /* if free space is not available, make more free space */ 2937 if (current_space->local_remaining < nzk) { 2938 i = am - k + 1; /* num of unfactored rows */ 2939 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2940 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2941 reallocs++; 2942 } 2943 2944 /* copy data into free space, then initialize lnk */ 2945 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2946 2947 /* add the k-th row into il and jl */ 2948 if (nzk > 1) { 2949 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2950 jl[k] = jl[i]; 2951 jl[i] = k; 2952 il[k] = ui[k] + 1; 2953 } 2954 ui_ptr[k] = current_space->array; 2955 2956 current_space->array += nzk; 2957 current_space->local_used += nzk; 2958 current_space->local_remaining -= nzk; 2959 2960 ui[k + 1] = ui[k] + nzk; 2961 } 2962 2963 PetscCall(ISRestoreIndices(perm, &rip)); 2964 PetscCall(ISRestoreIndices(iperm, &riip)); 2965 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 2966 2967 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2968 PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj)); 2969 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2970 PetscCall(PetscLLDestroy(lnk, lnkbt)); 2971 2972 /* put together the new matrix in MATSEQSBAIJ format */ 2973 b = (Mat_SeqSBAIJ *)fact->data; 2974 b->free_ij = PETSC_TRUE; 2975 PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a)); 2976 b->free_a = PETSC_TRUE; 2977 b->j = uj; 2978 b->i = ui; 2979 b->diag = udiag; 2980 b->free_diag = PETSC_TRUE; 2981 b->ilen = NULL; 2982 b->imax = NULL; 2983 b->row = perm; 2984 b->col = perm; 2985 2986 PetscCall(PetscObjectReference((PetscObject)perm)); 2987 PetscCall(PetscObjectReference((PetscObject)perm)); 2988 2989 b->icol = iperm; 2990 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2991 2992 PetscCall(PetscMalloc1(am, &b->solve_work)); 2993 2994 b->maxnz = b->nz = ui[am]; 2995 2996 fact->info.factor_mallocs = reallocs; 2997 fact->info.fill_ratio_given = fill; 2998 if (ai[am] != 0) { 2999 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 3000 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 3001 } else { 3002 fact->info.fill_ratio_needed = 0.0; 3003 } 3004 #if defined(PETSC_USE_INFO) 3005 if (ai[am] != 0) { 3006 PetscReal af = fact->info.fill_ratio_needed; 3007 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 3008 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3009 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 3010 } else { 3011 PetscCall(PetscInfo(A, "Empty matrix\n")); 3012 } 3013 #endif 3014 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 3015 PetscFunctionReturn(PETSC_SUCCESS); 3016 } 3017 3018 #if 0 3019 // unused 3020 static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 3021 { 3022 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3023 Mat_SeqSBAIJ *b; 3024 PetscBool perm_identity, missing; 3025 PetscReal fill = info->fill; 3026 const PetscInt *rip, *riip; 3027 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 3028 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 3029 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr; 3030 PetscFreeSpaceList free_space = NULL, current_space = NULL; 3031 PetscBT lnkbt; 3032 IS iperm; 3033 3034 PetscFunctionBegin; 3035 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 3036 PetscCall(MatMissingDiagonal(A, &missing, &i)); 3037 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 3038 3039 /* check whether perm is the identity mapping */ 3040 PetscCall(ISIdentity(perm, &perm_identity)); 3041 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 3042 PetscCall(ISGetIndices(iperm, &riip)); 3043 PetscCall(ISGetIndices(perm, &rip)); 3044 3045 /* initialization */ 3046 PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui)); 3047 ui[0] = 0; 3048 3049 /* jl: linked list for storing indices of the pivot rows 3050 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3051 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 3052 for (i = 0; i < am; i++) { 3053 jl[i] = am; 3054 il[i] = 0; 3055 } 3056 3057 /* create and initialize a linked list for storing column indices of the active row k */ 3058 nlnk = am + 1; 3059 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 3060 3061 /* initial FreeSpace size is fill*(ai[am]+1) */ 3062 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space)); 3063 current_space = free_space; 3064 3065 for (k = 0; k < am; k++) { /* for each active row k */ 3066 /* initialize lnk by the column indices of row rip[k] of A */ 3067 nzk = 0; 3068 ncols = ai[rip[k] + 1] - ai[rip[k]]; 3069 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 3070 ncols_upper = 0; 3071 for (j = 0; j < ncols; j++) { 3072 i = riip[*(aj + ai[rip[k]] + j)]; 3073 if (i >= k) { /* only take upper triangular entry */ 3074 cols[ncols_upper] = i; 3075 ncols_upper++; 3076 } 3077 } 3078 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 3079 nzk += nlnk; 3080 3081 /* update lnk by computing fill-in for each pivot row to be merged in */ 3082 prow = jl[k]; /* 1st pivot row */ 3083 3084 while (prow < k) { 3085 nextprow = jl[prow]; 3086 /* merge prow into k-th row */ 3087 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3088 jmax = ui[prow + 1]; 3089 ncols = jmax - jmin; 3090 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3091 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 3092 nzk += nlnk; 3093 3094 /* update il and jl for prow */ 3095 if (jmin < jmax) { 3096 il[prow] = jmin; 3097 j = *uj_ptr; 3098 jl[prow] = jl[j]; 3099 jl[j] = prow; 3100 } 3101 prow = nextprow; 3102 } 3103 3104 /* if free space is not available, make more free space */ 3105 if (current_space->local_remaining < nzk) { 3106 i = am - k + 1; /* num of unfactored rows */ 3107 i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3108 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 3109 reallocs++; 3110 } 3111 3112 /* copy data into free space, then initialize lnk */ 3113 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 3114 3115 /* add the k-th row into il and jl */ 3116 if (nzk - 1 > 0) { 3117 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3118 jl[k] = jl[i]; 3119 jl[i] = k; 3120 il[k] = ui[k] + 1; 3121 } 3122 ui_ptr[k] = current_space->array; 3123 3124 current_space->array += nzk; 3125 current_space->local_used += nzk; 3126 current_space->local_remaining -= nzk; 3127 3128 ui[k + 1] = ui[k] + nzk; 3129 } 3130 3131 #if defined(PETSC_USE_INFO) 3132 if (ai[am] != 0) { 3133 PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am]; 3134 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 3135 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3136 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 3137 } else { 3138 PetscCall(PetscInfo(A, "Empty matrix\n")); 3139 } 3140 #endif 3141 3142 PetscCall(ISRestoreIndices(perm, &rip)); 3143 PetscCall(ISRestoreIndices(iperm, &riip)); 3144 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 3145 3146 /* destroy list of free space and other temporary array(s) */ 3147 PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj)); 3148 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 3149 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3150 3151 /* put together the new matrix in MATSEQSBAIJ format */ 3152 b = (Mat_SeqSBAIJ *)fact->data; 3153 b->free_ij = PETSC_TRUE; 3154 PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a)); 3155 b->free_a = PETSC_TRUE; 3156 b->j = uj; 3157 b->i = ui; 3158 b->diag = NULL; 3159 b->ilen = NULL; 3160 b->imax = NULL; 3161 b->row = perm; 3162 b->col = perm; 3163 3164 PetscCall(PetscObjectReference((PetscObject)perm)); 3165 PetscCall(PetscObjectReference((PetscObject)perm)); 3166 3167 b->icol = iperm; 3168 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3169 3170 PetscCall(PetscMalloc1(am + 1, &b->solve_work)); 3171 b->maxnz = b->nz = ui[am]; 3172 3173 fact->info.factor_mallocs = reallocs; 3174 fact->info.fill_ratio_given = fill; 3175 if (ai[am] != 0) { 3176 fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am]; 3177 } else { 3178 fact->info.fill_ratio_needed = 0.0; 3179 } 3180 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3181 PetscFunctionReturn(PETSC_SUCCESS); 3182 } 3183 #endif 3184 3185 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 3186 { 3187 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3188 PetscInt n = A->rmap->n; 3189 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 3190 PetscScalar *x, sum; 3191 const PetscScalar *b; 3192 const MatScalar *aa, *v; 3193 PetscInt i, nz; 3194 3195 PetscFunctionBegin; 3196 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3197 3198 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 3199 PetscCall(VecGetArrayRead(bb, &b)); 3200 PetscCall(VecGetArrayWrite(xx, &x)); 3201 3202 /* forward solve the lower triangular */ 3203 x[0] = b[0]; 3204 v = aa; 3205 vi = aj; 3206 for (i = 1; i < n; i++) { 3207 nz = ai[i + 1] - ai[i]; 3208 sum = b[i]; 3209 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3210 v += nz; 3211 vi += nz; 3212 x[i] = sum; 3213 } 3214 3215 /* backward solve the upper triangular */ 3216 for (i = n - 1; i >= 0; i--) { 3217 v = aa + adiag[i + 1] + 1; 3218 vi = aj + adiag[i + 1] + 1; 3219 nz = adiag[i] - adiag[i + 1] - 1; 3220 sum = x[i]; 3221 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 3222 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3223 } 3224 3225 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3226 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 3227 PetscCall(VecRestoreArrayRead(bb, &b)); 3228 PetscCall(VecRestoreArrayWrite(xx, &x)); 3229 PetscFunctionReturn(PETSC_SUCCESS); 3230 } 3231 3232 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 3233 { 3234 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 3235 IS iscol = a->col, isrow = a->row; 3236 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 3237 const PetscInt *rout, *cout, *r, *c; 3238 PetscScalar *x, *tmp, sum; 3239 const PetscScalar *b; 3240 const MatScalar *aa, *v; 3241 3242 PetscFunctionBegin; 3243 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 3244 3245 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 3246 PetscCall(VecGetArrayRead(bb, &b)); 3247 PetscCall(VecGetArrayWrite(xx, &x)); 3248 tmp = a->solve_work; 3249 3250 PetscCall(ISGetIndices(isrow, &rout)); 3251 r = rout; 3252 PetscCall(ISGetIndices(iscol, &cout)); 3253 c = cout; 3254 3255 /* forward solve the lower triangular */ 3256 tmp[0] = b[r[0]]; 3257 v = aa; 3258 vi = aj; 3259 for (i = 1; i < n; i++) { 3260 nz = ai[i + 1] - ai[i]; 3261 sum = b[r[i]]; 3262 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3263 tmp[i] = sum; 3264 v += nz; 3265 vi += nz; 3266 } 3267 3268 /* backward solve the upper triangular */ 3269 for (i = n - 1; i >= 0; i--) { 3270 v = aa + adiag[i + 1] + 1; 3271 vi = aj + adiag[i + 1] + 1; 3272 nz = adiag[i] - adiag[i + 1] - 1; 3273 sum = tmp[i]; 3274 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 3275 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 3276 } 3277 3278 PetscCall(ISRestoreIndices(isrow, &rout)); 3279 PetscCall(ISRestoreIndices(iscol, &cout)); 3280 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 3281 PetscCall(VecRestoreArrayRead(bb, &b)); 3282 PetscCall(VecRestoreArrayWrite(xx, &x)); 3283 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 3284 PetscFunctionReturn(PETSC_SUCCESS); 3285 } 3286 3287 #if 0 3288 // unused 3289 /* 3290 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3291 */ 3292 static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact) 3293 { 3294 Mat B = *fact; 3295 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 3296 IS isicol; 3297 const PetscInt *r, *ic; 3298 PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag; 3299 PetscInt *bi, *bj, *bdiag, *bdiag_rev; 3300 PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au; 3301 PetscInt nlnk, *lnk; 3302 PetscBT lnkbt; 3303 PetscBool row_identity, icol_identity; 3304 MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp; 3305 const PetscInt *ics; 3306 PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp; 3307 PetscReal dt = info->dt, shift = info->shiftamount; 3308 PetscInt dtcount = (PetscInt)info->dtcount, nnz_max; 3309 PetscBool missing; 3310 3311 PetscFunctionBegin; 3312 if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005; 3313 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax); 3314 3315 /* symbolic factorization, can be reused */ 3316 PetscCall(MatMissingDiagonal(A, &missing, &i)); 3317 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 3318 adiag = a->diag; 3319 3320 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3321 3322 /* bdiag is location of diagonal in factor */ 3323 PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */ 3324 PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */ 3325 3326 /* allocate row pointers bi */ 3327 PetscCall(PetscShmgetAllocateArray(2 * n + 2,sizeof(PetscInt),(void **)&bi)); 3328 3329 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3330 if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */ 3331 nnz_max = ai[n] + 2 * n * dtcount + 2; 3332 3333 PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscInt),(void **)&bj)); 3334 PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscScalar),(void **)&ba)); 3335 3336 /* put together the new matrix */ 3337 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 3338 b = (Mat_SeqAIJ *)B->data; 3339 b->free_a = PETSC_TRUE; 3340 b->free_ij = PETSC_TRUE; 3341 b->a = ba; 3342 b->j = bj; 3343 b->i = bi; 3344 b->diag = bdiag; 3345 b->ilen = NULL; 3346 b->imax = NULL; 3347 b->row = isrow; 3348 b->col = iscol; 3349 PetscCall(PetscObjectReference((PetscObject)isrow)); 3350 PetscCall(PetscObjectReference((PetscObject)iscol)); 3351 b->icol = isicol; 3352 3353 PetscCall(PetscMalloc1(n + 1, &b->solve_work)); 3354 b->maxnz = nnz_max; 3355 3356 B->factortype = MAT_FACTOR_ILUDT; 3357 B->info.factor_mallocs = 0; 3358 B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]); 3359 /* end of symbolic factorization */ 3360 3361 PetscCall(ISGetIndices(isrow, &r)); 3362 PetscCall(ISGetIndices(isicol, &ic)); 3363 ics = ic; 3364 3365 /* linked list for storing column indices of the active row */ 3366 nlnk = n + 1; 3367 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 3368 3369 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3370 PetscCall(PetscMalloc2(n, &im, n, &jtmp)); 3371 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3372 PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp)); 3373 PetscCall(PetscArrayzero(rtmp, n)); 3374 3375 bi[0] = 0; 3376 bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */ 3377 bdiag_rev[n] = bdiag[0]; 3378 bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */ 3379 for (i = 0; i < n; i++) { 3380 /* copy initial fill into linked list */ 3381 nzi = ai[r[i] + 1] - ai[r[i]]; 3382 PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 3383 nzi_al = adiag[r[i]] - ai[r[i]]; 3384 nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1; 3385 ajtmp = aj + ai[r[i]]; 3386 PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 3387 3388 /* load in initial (unfactored row) */ 3389 aatmp = a->a + ai[r[i]]; 3390 for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++; 3391 3392 /* add pivot rows into linked list */ 3393 row = lnk[n]; 3394 while (row < i) { 3395 nzi_bl = bi[row + 1] - bi[row] + 1; 3396 bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */ 3397 PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im)); 3398 nzi += nlnk; 3399 row = lnk[row]; 3400 } 3401 3402 /* copy data from lnk into jtmp, then initialize lnk */ 3403 PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt)); 3404 3405 /* numerical factorization */ 3406 bjtmp = jtmp; 3407 row = *bjtmp++; /* 1st pivot row */ 3408 while (row < i) { 3409 pc = rtmp + row; 3410 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3411 multiplier = (*pc) * (*pv); 3412 *pc = multiplier; 3413 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3414 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3415 pv = ba + bdiag[row + 1] + 1; 3416 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3417 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3418 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3419 } 3420 row = *bjtmp++; 3421 } 3422 3423 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3424 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3425 nzi_bl = 0; 3426 j = 0; 3427 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3428 vtmp[j] = rtmp[jtmp[j]]; 3429 rtmp[jtmp[j]] = 0.0; 3430 nzi_bl++; 3431 j++; 3432 } 3433 nzi_bu = nzi - nzi_bl - 1; 3434 while (j < nzi) { 3435 vtmp[j] = rtmp[jtmp[j]]; 3436 rtmp[jtmp[j]] = 0.0; 3437 j++; 3438 } 3439 3440 bjtmp = bj + bi[i]; 3441 batmp = ba + bi[i]; 3442 /* apply level dropping rule to L part */ 3443 ncut = nzi_al + dtcount; 3444 if (ncut < nzi_bl) { 3445 PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp)); 3446 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp)); 3447 } else { 3448 ncut = nzi_bl; 3449 } 3450 for (j = 0; j < ncut; j++) { 3451 bjtmp[j] = jtmp[j]; 3452 batmp[j] = vtmp[j]; 3453 } 3454 bi[i + 1] = bi[i] + ncut; 3455 nzi = ncut + 1; 3456 3457 /* apply level dropping rule to U part */ 3458 ncut = nzi_au + dtcount; 3459 if (ncut < nzi_bu) { 3460 PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1)); 3461 PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1)); 3462 } else { 3463 ncut = nzi_bu; 3464 } 3465 nzi += ncut; 3466 3467 /* mark bdiagonal */ 3468 bdiag[i + 1] = bdiag[i] - (ncut + 1); 3469 bdiag_rev[n - i - 1] = bdiag[i + 1]; 3470 bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1); 3471 bjtmp = bj + bdiag[i]; 3472 batmp = ba + bdiag[i]; 3473 *bjtmp = i; 3474 *batmp = diag_tmp; /* rtmp[i]; */ 3475 if (*batmp == 0.0) *batmp = dt + shift; 3476 *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */ 3477 3478 bjtmp = bj + bdiag[i + 1] + 1; 3479 batmp = ba + bdiag[i + 1] + 1; 3480 for (k = 0; k < ncut; k++) { 3481 bjtmp[k] = jtmp[nzi_bl + 1 + k]; 3482 batmp[k] = vtmp[nzi_bl + 1 + k]; 3483 } 3484 3485 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3486 } /* for (i=0; i<n; i++) */ 3487 PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]); 3488 3489 PetscCall(ISRestoreIndices(isrow, &r)); 3490 PetscCall(ISRestoreIndices(isicol, &ic)); 3491 3492 PetscCall(PetscLLDestroy(lnk, lnkbt)); 3493 PetscCall(PetscFree2(im, jtmp)); 3494 PetscCall(PetscFree2(rtmp, vtmp)); 3495 PetscCall(PetscFree(bdiag_rev)); 3496 3497 PetscCall(PetscLogFlops(B->cmap->n)); 3498 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3499 3500 PetscCall(ISIdentity(isrow, &row_identity)); 3501 PetscCall(ISIdentity(isicol, &icol_identity)); 3502 if (row_identity && icol_identity) { 3503 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3504 } else { 3505 B->ops->solve = MatSolve_SeqAIJ; 3506 } 3507 3508 B->ops->solveadd = NULL; 3509 B->ops->solvetranspose = NULL; 3510 B->ops->solvetransposeadd = NULL; 3511 B->ops->matsolve = NULL; 3512 B->assembled = PETSC_TRUE; 3513 B->preallocated = PETSC_TRUE; 3514 PetscFunctionReturn(PETSC_SUCCESS); 3515 } 3516 3517 /* a wrapper of MatILUDTFactor_SeqAIJ() */ 3518 /* 3519 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3520 */ 3521 3522 static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info) 3523 { 3524 PetscFunctionBegin; 3525 PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact)); 3526 PetscFunctionReturn(PETSC_SUCCESS); 3527 } 3528 3529 /* 3530 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3531 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3532 */ 3533 /* 3534 This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3535 */ 3536 3537 static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info) 3538 { 3539 Mat C = fact; 3540 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 3541 IS isrow = b->row, isicol = b->icol; 3542 const PetscInt *r, *ic, *ics; 3543 PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 3544 PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj; 3545 MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a; 3546 PetscReal dt = info->dt, shift = info->shiftamount; 3547 PetscBool row_identity, col_identity; 3548 3549 PetscFunctionBegin; 3550 PetscCall(ISGetIndices(isrow, &r)); 3551 PetscCall(ISGetIndices(isicol, &ic)); 3552 PetscCall(PetscMalloc1(n + 1, &rtmp)); 3553 ics = ic; 3554 3555 for (i = 0; i < n; i++) { 3556 /* initialize rtmp array */ 3557 nzl = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */ 3558 bjtmp = bj + bi[i]; 3559 for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0; 3560 rtmp[i] = 0.0; 3561 nzu = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */ 3562 bjtmp = bj + bdiag[i + 1] + 1; 3563 for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0; 3564 3565 /* load in initial unfactored row of A */ 3566 nz = ai[r[i] + 1] - ai[r[i]]; 3567 ajtmp = aj + ai[r[i]]; 3568 v = aa + ai[r[i]]; 3569 for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j]; 3570 3571 /* numerical factorization */ 3572 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3573 nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */ 3574 k = 0; 3575 while (k < nzl) { 3576 row = *bjtmp++; 3577 pc = rtmp + row; 3578 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3579 multiplier = (*pc) * (*pv); 3580 *pc = multiplier; 3581 if (PetscAbsScalar(multiplier) > dt) { 3582 pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */ 3583 pv = b->a + bdiag[row + 1] + 1; 3584 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3585 for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3586 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 3587 } 3588 k++; 3589 } 3590 3591 /* finished row so stick it into b->a */ 3592 /* L-part */ 3593 pv = b->a + bi[i]; 3594 pj = bj + bi[i]; 3595 nzl = bi[i + 1] - bi[i]; 3596 for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]]; 3597 3598 /* diagonal: invert diagonal entries for simpler triangular solves */ 3599 if (rtmp[i] == 0.0) rtmp[i] = dt + shift; 3600 b->a[bdiag[i]] = 1.0 / rtmp[i]; 3601 3602 /* U-part */ 3603 pv = b->a + bdiag[i + 1] + 1; 3604 pj = bj + bdiag[i + 1] + 1; 3605 nzu = bdiag[i] - bdiag[i + 1] - 1; 3606 for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]]; 3607 } 3608 3609 PetscCall(PetscFree(rtmp)); 3610 PetscCall(ISRestoreIndices(isicol, &ic)); 3611 PetscCall(ISRestoreIndices(isrow, &r)); 3612 3613 PetscCall(ISIdentity(isrow, &row_identity)); 3614 PetscCall(ISIdentity(isicol, &col_identity)); 3615 if (row_identity && col_identity) { 3616 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3617 } else { 3618 C->ops->solve = MatSolve_SeqAIJ; 3619 } 3620 C->ops->solveadd = NULL; 3621 C->ops->solvetranspose = NULL; 3622 C->ops->solvetransposeadd = NULL; 3623 C->ops->matsolve = NULL; 3624 C->assembled = PETSC_TRUE; 3625 C->preallocated = PETSC_TRUE; 3626 3627 PetscCall(PetscLogFlops(C->cmap->n)); 3628 PetscFunctionReturn(PETSC_SUCCESS); 3629 } 3630 #endif 3631