1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <petscbt.h> 4 #include <../src/mat/utils/freespace.h> 5 6 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 7 { 8 PetscFunctionBegin; 9 *type = MATSOLVERPETSC; 10 PetscFunctionReturn(PETSC_SUCCESS); 11 } 12 13 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 14 { 15 PetscInt n = A->rmap->n; 16 17 PetscFunctionBegin; 18 #if defined(PETSC_USE_COMPLEX) 19 if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 20 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n")); 21 *B = NULL; 22 PetscFunctionReturn(PETSC_SUCCESS); 23 } 24 #endif 25 26 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 27 PetscCall(MatSetSizes(*B, n, n, n, n)); 28 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 29 PetscCall(MatSetType(*B, MATSEQAIJ)); 30 31 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 32 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 33 34 PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 35 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 36 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 37 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 38 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 39 PetscCall(MatSetType(*B, MATSEQSBAIJ)); 40 PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 41 42 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 43 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 44 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 45 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 46 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 47 (*B)->factortype = ftype; 48 49 PetscCall(PetscFree((*B)->solvertype)); 50 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 51 (*B)->canuseordering = PETSC_TRUE; 52 PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 53 PetscFunctionReturn(PETSC_SUCCESS); 54 } 55 56 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 57 { 58 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 59 IS isicol; 60 const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 61 PetscInt i, n = A->rmap->n; 62 PetscInt *bi, *bj; 63 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 64 PetscReal f; 65 PetscInt nlnk, *lnk, k, **bi_ptr; 66 PetscFreeSpaceList free_space = NULL, current_space = NULL; 67 PetscBT lnkbt; 68 PetscBool diagDense; 69 70 PetscFunctionBegin; 71 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 72 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense)); 73 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 74 75 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 76 PetscCall(ISGetIndices(isrow, &r)); 77 PetscCall(ISGetIndices(isicol, &ic)); 78 79 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 80 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 81 PetscCall(PetscMalloc1(n + 1, &bdiag)); 82 bi[0] = bdiag[0] = 0; 83 84 /* linked list for storing column indices of the active row */ 85 nlnk = n + 1; 86 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 87 88 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 89 90 /* initial FreeSpace size is f*(ai[n]+1) */ 91 f = info->fill; 92 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 93 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 94 current_space = free_space; 95 96 for (i = 0; i < n; i++) { 97 /* copy previous fill into linked list */ 98 nzi = 0; 99 nnz = ai[r[i] + 1] - ai[r[i]]; 100 ajtmp = aj + ai[r[i]]; 101 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 102 nzi += nlnk; 103 104 /* add pivot rows into linked list */ 105 row = lnk[n]; 106 while (row < i) { 107 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 108 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 109 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 110 nzi += nlnk; 111 row = lnk[row]; 112 } 113 bi[i + 1] = bi[i] + nzi; 114 im[i] = nzi; 115 116 /* mark bdiag */ 117 nzbd = 0; 118 nnz = nzi; 119 k = lnk[n]; 120 while (nnz-- && k < i) { 121 nzbd++; 122 k = lnk[k]; 123 } 124 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 125 126 /* if free space is not available, make more free space */ 127 if (current_space->local_remaining < nzi) { 128 /* estimated additional space needed */ 129 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 130 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 131 reallocs++; 132 } 133 134 /* copy data into free space, then initialize lnk */ 135 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 136 137 bi_ptr[i] = current_space->array; 138 current_space->array += nzi; 139 current_space->local_used += nzi; 140 current_space->local_remaining -= nzi; 141 } 142 143 PetscCall(ISRestoreIndices(isrow, &r)); 144 PetscCall(ISRestoreIndices(isicol, &ic)); 145 146 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 147 PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj)); 148 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 149 PetscCall(PetscLLDestroy(lnk, lnkbt)); 150 PetscCall(PetscFree2(bi_ptr, im)); 151 152 /* put together the new matrix */ 153 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 154 b = (Mat_SeqAIJ *)B->data; 155 b->free_ij = PETSC_TRUE; 156 PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 157 b->free_a = PETSC_TRUE; 158 b->j = bj; 159 b->i = bi; 160 b->diag = bdiag; 161 b->ilen = NULL; 162 b->imax = NULL; 163 b->row = isrow; 164 b->col = iscol; 165 PetscCall(PetscObjectReference((PetscObject)isrow)); 166 PetscCall(PetscObjectReference((PetscObject)iscol)); 167 b->icol = isicol; 168 PetscCall(PetscMalloc1(n, &b->solve_work)); 169 170 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 171 b->maxnz = b->nz = bdiag[0] + 1; 172 173 B->factortype = MAT_FACTOR_LU; 174 B->info.factor_mallocs = reallocs; 175 B->info.fill_ratio_given = f; 176 177 if (ai[n]) { 178 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 179 } else { 180 B->info.fill_ratio_needed = 0.0; 181 } 182 #if defined(PETSC_USE_INFO) 183 if (ai[n] != 0) { 184 PetscReal af = B->info.fill_ratio_needed; 185 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 186 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 187 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 188 PetscCall(PetscInfo(A, "for best performance.\n")); 189 } else { 190 PetscCall(PetscInfo(A, "Empty matrix\n")); 191 } 192 #endif 193 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 194 if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 195 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 196 PetscFunctionReturn(PETSC_SUCCESS); 197 } 198 199 /* 200 Trouble in factorization, should we dump the original matrix? 201 */ 202 PetscErrorCode MatFactorDumpMatrix(Mat A) 203 { 204 PetscBool flg = PETSC_FALSE; 205 206 PetscFunctionBegin; 207 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 208 if (flg) { 209 PetscViewer viewer; 210 char filename[PETSC_MAX_PATH_LEN]; 211 212 PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 213 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 214 PetscCall(MatView(A, viewer)); 215 PetscCall(PetscViewerDestroy(&viewer)); 216 } 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219 220 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 221 { 222 Mat C = B; 223 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 224 IS isrow = b->row, isicol = b->icol; 225 const PetscInt *r, *ic, *ics; 226 const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 227 PetscInt i, j, k, nz, nzL, row, *pj; 228 const PetscInt *ajtmp, *bjtmp; 229 MatScalar *rtmp, *pc, multiplier, *pv; 230 const MatScalar *aa, *v; 231 MatScalar *ba; 232 PetscBool row_identity, col_identity; 233 FactorShiftCtx sctx; 234 const PetscInt *ddiag; 235 PetscReal rs; 236 MatScalar d; 237 238 PetscFunctionBegin; 239 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 240 PetscCall(MatSeqAIJGetArrayWrite(B, &ba)); 241 /* MatPivotSetUp(): initialize shift context sctx */ 242 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 243 244 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 245 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 246 sctx.shift_top = info->zeropivot; 247 for (i = 0; i < n; i++) { 248 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 249 d = aa[ddiag[i]]; 250 rs = -PetscAbsScalar(d) - PetscRealPart(d); 251 v = aa + ai[i]; 252 nz = ai[i + 1] - ai[i]; 253 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 254 if (rs > sctx.shift_top) sctx.shift_top = rs; 255 } 256 sctx.shift_top *= 1.1; 257 sctx.nshift_max = 5; 258 sctx.shift_lo = 0.; 259 sctx.shift_hi = 1.; 260 } 261 262 PetscCall(ISGetIndices(isrow, &r)); 263 PetscCall(ISGetIndices(isicol, &ic)); 264 PetscCall(PetscMalloc1(n + 1, &rtmp)); 265 ics = ic; 266 267 do { 268 sctx.newshift = PETSC_FALSE; 269 for (i = 0; i < n; i++) { 270 /* zero rtmp */ 271 /* L part */ 272 nz = bi[i + 1] - bi[i]; 273 bjtmp = bj + bi[i]; 274 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 275 276 /* U part */ 277 nz = bdiag[i] - bdiag[i + 1]; 278 bjtmp = bj + bdiag[i + 1] + 1; 279 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 280 281 /* load in initial (unfactored row) */ 282 nz = ai[r[i] + 1] - ai[r[i]]; 283 ajtmp = aj + ai[r[i]]; 284 v = aa + ai[r[i]]; 285 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 286 /* ZeropivotApply() */ 287 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 288 289 /* elimination */ 290 bjtmp = bj + bi[i]; 291 row = *bjtmp++; 292 nzL = bi[i + 1] - bi[i]; 293 for (k = 0; k < nzL; k++) { 294 pc = rtmp + row; 295 if (*pc != 0.0) { 296 pv = ba + bdiag[row]; 297 multiplier = *pc * (*pv); 298 *pc = multiplier; 299 300 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 301 pv = ba + bdiag[row + 1] + 1; 302 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 303 304 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 305 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 306 } 307 row = *bjtmp++; 308 } 309 310 /* finished row so stick it into b->a */ 311 rs = 0.0; 312 /* L part */ 313 pv = ba + bi[i]; 314 pj = b->j + bi[i]; 315 nz = bi[i + 1] - bi[i]; 316 for (j = 0; j < nz; j++) { 317 pv[j] = rtmp[pj[j]]; 318 rs += PetscAbsScalar(pv[j]); 319 } 320 321 /* U part */ 322 pv = ba + bdiag[i + 1] + 1; 323 pj = b->j + bdiag[i + 1] + 1; 324 nz = bdiag[i] - bdiag[i + 1] - 1; 325 for (j = 0; j < nz; j++) { 326 pv[j] = rtmp[pj[j]]; 327 rs += PetscAbsScalar(pv[j]); 328 } 329 330 sctx.rs = rs; 331 sctx.pv = rtmp[i]; 332 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 333 if (sctx.newshift) break; /* break for-loop */ 334 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 335 336 /* Mark diagonal and invert diagonal for simpler triangular solves */ 337 pv = ba + bdiag[i]; 338 *pv = 1.0 / rtmp[i]; 339 340 } /* endof for (i=0; i<n; i++) { */ 341 342 /* MatPivotRefine() */ 343 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 344 /* 345 * if no shift in this attempt & shifting & started shifting & can refine, 346 * then try lower shift 347 */ 348 sctx.shift_hi = sctx.shift_fraction; 349 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 350 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 351 sctx.newshift = PETSC_TRUE; 352 sctx.nshift++; 353 } 354 } while (sctx.newshift); 355 356 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 357 PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba)); 358 359 PetscCall(PetscFree(rtmp)); 360 PetscCall(ISRestoreIndices(isicol, &ic)); 361 PetscCall(ISRestoreIndices(isrow, &r)); 362 363 PetscCall(ISIdentity(isrow, &row_identity)); 364 PetscCall(ISIdentity(isicol, &col_identity)); 365 if (b->inode.size_csr) { 366 C->ops->solve = MatSolve_SeqAIJ_Inode; 367 } else if (row_identity && col_identity) { 368 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 369 } else { 370 C->ops->solve = MatSolve_SeqAIJ; 371 } 372 C->ops->solveadd = MatSolveAdd_SeqAIJ; 373 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 374 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 375 C->ops->matsolve = MatMatSolve_SeqAIJ; 376 C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ; 377 C->assembled = PETSC_TRUE; 378 C->preallocated = PETSC_TRUE; 379 380 PetscCall(PetscLogFlops(C->cmap->n)); 381 382 /* MatShiftView(A,info,&sctx) */ 383 if (sctx.nshift) { 384 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 385 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 386 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 387 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 388 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 389 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 390 } 391 } 392 PetscFunctionReturn(PETSC_SUCCESS); 393 } 394 395 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat); 396 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec); 397 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec); 398 399 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 400 { 401 Mat C = B; 402 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 403 IS isrow = b->row, isicol = b->icol; 404 const PetscInt *r, *ic, *ics; 405 PetscInt nz, row, i, j, n = A->rmap->n, diag; 406 const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 407 const PetscInt *ajtmp, *bjtmp, *ddiag, *pj; 408 MatScalar *pv, *rtmp, *pc, multiplier, d; 409 const MatScalar *v, *aa; 410 MatScalar *ba; 411 PetscReal rs = 0.0; 412 FactorShiftCtx sctx; 413 PetscBool row_identity, col_identity; 414 415 PetscFunctionBegin; 416 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 417 418 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 419 PetscCall(MatSeqAIJGetArrayWrite(B, &ba)); 420 /* MatPivotSetUp(): initialize shift context sctx */ 421 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 422 423 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 424 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 425 sctx.shift_top = info->zeropivot; 426 for (i = 0; i < n; i++) { 427 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 428 d = aa[ddiag[i]]; 429 rs = -PetscAbsScalar(d) - PetscRealPart(d); 430 v = aa + ai[i]; 431 nz = ai[i + 1] - ai[i]; 432 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 433 if (rs > sctx.shift_top) sctx.shift_top = rs; 434 } 435 sctx.shift_top *= 1.1; 436 sctx.nshift_max = 5; 437 sctx.shift_lo = 0.; 438 sctx.shift_hi = 1.; 439 } 440 441 PetscCall(ISGetIndices(isrow, &r)); 442 PetscCall(ISGetIndices(isicol, &ic)); 443 PetscCall(PetscMalloc1(n + 1, &rtmp)); 444 ics = ic; 445 446 do { 447 sctx.newshift = PETSC_FALSE; 448 for (i = 0; i < n; i++) { 449 nz = bi[i + 1] - bi[i]; 450 bjtmp = bj + bi[i]; 451 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 452 453 /* load in initial (unfactored row) */ 454 nz = ai[r[i] + 1] - ai[r[i]]; 455 ajtmp = aj + ai[r[i]]; 456 v = aa + ai[r[i]]; 457 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 458 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 459 460 row = *bjtmp++; 461 while (row < i) { 462 pc = rtmp + row; 463 if (*pc != 0.0) { 464 pv = ba + ddiag[row]; 465 pj = b->j + ddiag[row] + 1; 466 multiplier = *pc / *pv++; 467 *pc = multiplier; 468 nz = bi[row + 1] - ddiag[row] - 1; 469 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 470 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 471 } 472 row = *bjtmp++; 473 } 474 /* finished row so stick it into b->a */ 475 pv = ba + bi[i]; 476 pj = b->j + bi[i]; 477 nz = bi[i + 1] - bi[i]; 478 diag = ddiag[i] - bi[i]; 479 rs = 0.0; 480 for (j = 0; j < nz; j++) { 481 pv[j] = rtmp[pj[j]]; 482 rs += PetscAbsScalar(pv[j]); 483 } 484 rs -= PetscAbsScalar(pv[diag]); 485 486 sctx.rs = rs; 487 sctx.pv = pv[diag]; 488 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 489 if (sctx.newshift) break; 490 pv[diag] = sctx.pv; 491 } 492 493 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 494 /* 495 * if no shift in this attempt & shifting & started shifting & can refine, 496 * then try lower shift 497 */ 498 sctx.shift_hi = sctx.shift_fraction; 499 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 500 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 501 sctx.newshift = PETSC_TRUE; 502 sctx.nshift++; 503 } 504 } while (sctx.newshift); 505 506 /* invert diagonal entries for simpler triangular solves */ 507 for (i = 0; i < n; i++) ba[ddiag[i]] = 1.0 / ba[ddiag[i]]; 508 PetscCall(PetscFree(rtmp)); 509 PetscCall(ISRestoreIndices(isicol, &ic)); 510 PetscCall(ISRestoreIndices(isrow, &r)); 511 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 512 PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba)); 513 514 PetscCall(ISIdentity(isrow, &row_identity)); 515 PetscCall(ISIdentity(isicol, &col_identity)); 516 if (row_identity && col_identity) { 517 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 518 } else { 519 C->ops->solve = MatSolve_SeqAIJ_inplace; 520 } 521 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 522 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 523 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 524 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 525 C->ops->matsolvetranspose = NULL; 526 527 C->assembled = PETSC_TRUE; 528 C->preallocated = PETSC_TRUE; 529 530 PetscCall(PetscLogFlops(C->cmap->n)); 531 if (sctx.nshift) { 532 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 533 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 534 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 535 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 536 } 537 } 538 C->ops->solve = MatSolve_SeqAIJ_inplace; 539 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 540 541 PetscCall(MatSeqAIJCheckInode(C)); 542 PetscFunctionReturn(PETSC_SUCCESS); 543 } 544 545 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec); 546 547 /* 548 This routine implements inplace ILU(0) with row or/and column permutations. 549 Input: 550 A - original matrix 551 Output; 552 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 553 a->j (col index) is permuted by the inverse of colperm, then sorted 554 a->a reordered accordingly with a->j 555 a->diag (ptr to diagonal elements) is updated. 556 */ 557 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 558 { 559 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 560 IS isrow = a->row, isicol = a->icol; 561 const PetscInt *r, *ic, *ics; 562 PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 563 PetscInt *ajtmp, nz, row; 564 PetscInt nbdiag, *pj; 565 PetscScalar *rtmp, *pc, multiplier, d; 566 MatScalar *pv, *v; 567 PetscReal rs; 568 FactorShiftCtx sctx; 569 MatScalar *aa, *vtmp; 570 PetscInt *diag; 571 572 PetscFunctionBegin; 573 PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 574 575 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, (const PetscInt **)&diag, NULL)); 576 PetscCall(MatSeqAIJGetArray(A, &aa)); 577 /* MatPivotSetUp(): initialize shift context sctx */ 578 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 579 580 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 581 const PetscInt *ddiag; 582 583 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 584 sctx.shift_top = info->zeropivot; 585 for (i = 0; i < n; i++) { 586 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 587 d = aa[ddiag[i]]; 588 rs = -PetscAbsScalar(d) - PetscRealPart(d); 589 vtmp = aa + ai[i]; 590 nz = ai[i + 1] - ai[i]; 591 for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 592 if (rs > sctx.shift_top) sctx.shift_top = rs; 593 } 594 sctx.shift_top *= 1.1; 595 sctx.nshift_max = 5; 596 sctx.shift_lo = 0.; 597 sctx.shift_hi = 1.; 598 } 599 600 PetscCall(ISGetIndices(isrow, &r)); 601 PetscCall(ISGetIndices(isicol, &ic)); 602 PetscCall(PetscMalloc1(n + 1, &rtmp)); 603 PetscCall(PetscArrayzero(rtmp, n + 1)); 604 ics = ic; 605 606 #if defined(MV) 607 sctx.shift_top = 0.; 608 sctx.nshift_max = 0; 609 sctx.shift_lo = 0.; 610 sctx.shift_hi = 0.; 611 sctx.shift_fraction = 0.; 612 613 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 614 sctx.shift_top = 0.; 615 for (i = 0; i < n; i++) { 616 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 617 d = aa[diag[i]]; 618 rs = -PetscAbsScalar(d) - PetscRealPart(d); 619 v = aa + ai[i]; 620 nz = ai[i + 1] - ai[i]; 621 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 622 if (rs > sctx.shift_top) sctx.shift_top = rs; 623 } 624 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 625 sctx.shift_top *= 1.1; 626 sctx.nshift_max = 5; 627 sctx.shift_lo = 0.; 628 sctx.shift_hi = 1.; 629 } 630 631 sctx.shift_amount = 0.; 632 sctx.nshift = 0; 633 #endif 634 635 do { 636 sctx.newshift = PETSC_FALSE; 637 for (i = 0; i < n; i++) { 638 /* load in initial unfactored row */ 639 nz = ai[r[i] + 1] - ai[r[i]]; 640 ajtmp = aj + ai[r[i]]; 641 v = aa + ai[r[i]]; 642 /* sort permuted ajtmp and values v accordingly */ 643 for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 644 PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 645 646 diag[r[i]] = ai[r[i]]; 647 for (j = 0; j < nz; j++) { 648 rtmp[ajtmp[j]] = v[j]; 649 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 650 } 651 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 652 653 row = *ajtmp++; 654 while (row < i) { 655 pc = rtmp + row; 656 if (*pc != 0.0) { 657 pv = aa + diag[r[row]]; 658 pj = aj + diag[r[row]] + 1; 659 660 multiplier = *pc / *pv++; 661 *pc = multiplier; 662 nz = ai[r[row] + 1] - diag[r[row]] - 1; 663 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 664 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 665 } 666 row = *ajtmp++; 667 } 668 /* finished row so overwrite it onto aa */ 669 pv = aa + ai[r[i]]; 670 pj = aj + ai[r[i]]; 671 nz = ai[r[i] + 1] - ai[r[i]]; 672 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 673 674 rs = 0.0; 675 for (j = 0; j < nz; j++) { 676 pv[j] = rtmp[pj[j]]; 677 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 678 } 679 680 sctx.rs = rs; 681 sctx.pv = pv[nbdiag]; 682 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 683 if (sctx.newshift) break; 684 pv[nbdiag] = sctx.pv; 685 } 686 687 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 688 /* 689 * if no shift in this attempt & shifting & started shifting & can refine, 690 * then try lower shift 691 */ 692 sctx.shift_hi = sctx.shift_fraction; 693 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 694 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 695 sctx.newshift = PETSC_TRUE; 696 sctx.nshift++; 697 } 698 } while (sctx.newshift); 699 700 /* invert diagonal entries for simpler triangular solves */ 701 for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]]; 702 703 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 704 PetscCall(PetscFree(rtmp)); 705 PetscCall(ISRestoreIndices(isicol, &ic)); 706 PetscCall(ISRestoreIndices(isrow, &r)); 707 708 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 709 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 710 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 711 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 712 713 A->assembled = PETSC_TRUE; 714 A->preallocated = PETSC_TRUE; 715 716 PetscCall(PetscLogFlops(A->cmap->n)); 717 if (sctx.nshift) { 718 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 719 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 720 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 721 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 722 } 723 } 724 PetscFunctionReturn(PETSC_SUCCESS); 725 } 726 727 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 728 { 729 Mat C; 730 731 PetscFunctionBegin; 732 PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 733 PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 734 PetscCall(MatLUFactorNumeric(C, A, info)); 735 736 A->ops->solve = C->ops->solve; 737 A->ops->solvetranspose = C->ops->solvetranspose; 738 739 PetscCall(MatHeaderMerge(A, &C)); 740 PetscFunctionReturn(PETSC_SUCCESS); 741 } 742 743 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 744 { 745 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 746 IS iscol = a->col, isrow = a->row; 747 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 748 PetscInt nz; 749 const PetscInt *rout, *cout, *r, *c; 750 PetscScalar *x, *tmp, *tmps, sum; 751 const PetscScalar *b; 752 const MatScalar *aa, *v; 753 const PetscInt *adiag; 754 755 PetscFunctionBegin; 756 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 757 758 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 759 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 760 PetscCall(VecGetArrayRead(bb, &b)); 761 PetscCall(VecGetArrayWrite(xx, &x)); 762 tmp = a->solve_work; 763 764 PetscCall(ISGetIndices(isrow, &rout)); 765 r = rout; 766 PetscCall(ISGetIndices(iscol, &cout)); 767 c = cout + (n - 1); 768 769 /* forward solve the lower triangular */ 770 tmp[0] = b[*r++]; 771 tmps = tmp; 772 for (i = 1; i < n; i++) { 773 v = aa + ai[i]; 774 vi = aj + ai[i]; 775 nz = adiag[i] - ai[i]; 776 sum = b[*r++]; 777 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 778 tmp[i] = sum; 779 } 780 781 /* backward solve the upper triangular */ 782 for (i = n - 1; i >= 0; i--) { 783 v = aa + adiag[i] + 1; 784 vi = aj + adiag[i] + 1; 785 nz = ai[i + 1] - adiag[i] - 1; 786 sum = tmp[i]; 787 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 788 x[*c--] = tmp[i] = sum * aa[adiag[i]]; 789 } 790 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 791 PetscCall(ISRestoreIndices(isrow, &rout)); 792 PetscCall(ISRestoreIndices(iscol, &cout)); 793 PetscCall(VecRestoreArrayRead(bb, &b)); 794 PetscCall(VecRestoreArrayWrite(xx, &x)); 795 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 796 PetscFunctionReturn(PETSC_SUCCESS); 797 } 798 799 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 800 { 801 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 802 IS iscol = a->col, isrow = a->row; 803 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 804 PetscInt nz, neq, ldb, ldx; 805 const PetscInt *rout, *cout, *r, *c; 806 PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 807 const PetscScalar *b, *aa, *v; 808 PetscBool isdense; 809 const PetscInt *adiag; 810 811 PetscFunctionBegin; 812 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 813 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 814 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 815 if (X != B) { 816 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 817 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 818 } 819 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 820 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 821 PetscCall(MatDenseGetArrayRead(B, &b)); 822 PetscCall(MatDenseGetLDA(B, &ldb)); 823 PetscCall(MatDenseGetArray(X, &x)); 824 PetscCall(MatDenseGetLDA(X, &ldx)); 825 PetscCall(ISGetIndices(isrow, &rout)); 826 r = rout; 827 PetscCall(ISGetIndices(iscol, &cout)); 828 c = cout; 829 for (neq = 0; neq < B->cmap->n; neq++) { 830 /* forward solve the lower triangular */ 831 tmp[0] = b[r[0]]; 832 tmps = tmp; 833 for (i = 1; i < n; i++) { 834 v = aa + ai[i]; 835 vi = aj + ai[i]; 836 nz = adiag[i] - ai[i]; 837 sum = b[r[i]]; 838 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 839 tmp[i] = sum; 840 } 841 /* backward solve the upper triangular */ 842 for (i = n - 1; i >= 0; i--) { 843 v = aa + adiag[i] + 1; 844 vi = aj + adiag[i] + 1; 845 nz = ai[i + 1] - adiag[i] - 1; 846 sum = tmp[i]; 847 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 848 x[c[i]] = tmp[i] = sum * aa[adiag[i]]; 849 } 850 b += ldb; 851 x += ldx; 852 } 853 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 854 PetscCall(ISRestoreIndices(isrow, &rout)); 855 PetscCall(ISRestoreIndices(iscol, &cout)); 856 PetscCall(MatDenseRestoreArrayRead(B, &b)); 857 PetscCall(MatDenseRestoreArray(X, &x)); 858 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 859 PetscFunctionReturn(PETSC_SUCCESS); 860 } 861 862 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 863 { 864 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 865 IS iscol = a->col, isrow = a->row; 866 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 867 const PetscInt *adiag; 868 PetscInt nz, neq, ldb, ldx; 869 const PetscInt *rout, *cout, *r, *c; 870 PetscScalar *x, *tmp = a->solve_work, sum; 871 const PetscScalar *b, *aa, *v; 872 PetscBool isdense; 873 874 PetscFunctionBegin; 875 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 876 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 877 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 878 if (X != B) { 879 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 880 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 881 } 882 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 883 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 884 PetscCall(MatDenseGetArrayRead(B, &b)); 885 PetscCall(MatDenseGetLDA(B, &ldb)); 886 PetscCall(MatDenseGetArray(X, &x)); 887 PetscCall(MatDenseGetLDA(X, &ldx)); 888 PetscCall(ISGetIndices(isrow, &rout)); 889 r = rout; 890 PetscCall(ISGetIndices(iscol, &cout)); 891 c = cout; 892 for (neq = 0; neq < B->cmap->n; neq++) { 893 /* forward solve the lower triangular */ 894 tmp[0] = b[r[0]]; 895 v = aa; 896 vi = aj; 897 for (i = 1; i < n; i++) { 898 nz = ai[i + 1] - ai[i]; 899 sum = b[r[i]]; 900 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 901 tmp[i] = sum; 902 v += nz; 903 vi += nz; 904 } 905 /* backward solve the upper triangular */ 906 for (i = n - 1; i >= 0; i--) { 907 v = aa + adiag[i + 1] + 1; 908 vi = aj + adiag[i + 1] + 1; 909 nz = adiag[i] - adiag[i + 1] - 1; 910 sum = tmp[i]; 911 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 912 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 913 } 914 b += ldb; 915 x += ldx; 916 } 917 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 918 PetscCall(ISRestoreIndices(isrow, &rout)); 919 PetscCall(ISRestoreIndices(iscol, &cout)); 920 PetscCall(MatDenseRestoreArrayRead(B, &b)); 921 PetscCall(MatDenseRestoreArray(X, &x)); 922 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 923 PetscFunctionReturn(PETSC_SUCCESS); 924 } 925 926 PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X) 927 { 928 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 929 IS iscol = a->col, isrow = a->row; 930 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, j; 931 const PetscInt *adiag = a->diag; 932 PetscInt nz, neq, ldb, ldx; 933 const PetscInt *rout, *cout, *r, *c; 934 PetscScalar *x, *tmp = a->solve_work, s1; 935 const PetscScalar *b, *aa, *v; 936 PetscBool isdense; 937 938 PetscFunctionBegin; 939 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 940 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 941 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 942 if (X != B) { 943 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 944 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 945 } 946 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 947 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 948 PetscCall(MatDenseGetArrayRead(B, &b)); 949 PetscCall(MatDenseGetLDA(B, &ldb)); 950 PetscCall(MatDenseGetArray(X, &x)); 951 PetscCall(MatDenseGetLDA(X, &ldx)); 952 PetscCall(ISGetIndices(isrow, &rout)); 953 r = rout; 954 PetscCall(ISGetIndices(iscol, &cout)); 955 c = cout; 956 for (neq = 0; neq < B->cmap->n; neq++) { 957 /* copy the b into temp work space according to permutation */ 958 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 959 960 /* forward solve the U^T */ 961 for (i = 0; i < n; i++) { 962 v = aa + adiag[i + 1] + 1; 963 vi = aj + adiag[i + 1] + 1; 964 nz = adiag[i] - adiag[i + 1] - 1; 965 s1 = tmp[i]; 966 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 967 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 968 tmp[i] = s1; 969 } 970 971 /* backward solve the L^T */ 972 for (i = n - 1; i >= 0; i--) { 973 v = aa + ai[i]; 974 vi = aj + ai[i]; 975 nz = ai[i + 1] - ai[i]; 976 s1 = tmp[i]; 977 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 978 } 979 980 /* copy tmp into x according to permutation */ 981 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 982 b += ldb; 983 x += ldx; 984 } 985 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 986 PetscCall(ISRestoreIndices(isrow, &rout)); 987 PetscCall(ISRestoreIndices(iscol, &cout)); 988 PetscCall(MatDenseRestoreArrayRead(B, &b)); 989 PetscCall(MatDenseRestoreArray(X, &x)); 990 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 991 PetscFunctionReturn(PETSC_SUCCESS); 992 } 993 994 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 995 { 996 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 997 IS iscol = a->col, isrow = a->row; 998 const PetscInt *r, *c, *rout, *cout, *adiag; 999 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 1000 PetscInt nz, row; 1001 PetscScalar *x, *tmp, *tmps, sum; 1002 const PetscScalar *b; 1003 const MatScalar *aa, *v; 1004 1005 PetscFunctionBegin; 1006 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1007 1008 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1009 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1010 PetscCall(VecGetArrayRead(bb, &b)); 1011 PetscCall(VecGetArrayWrite(xx, &x)); 1012 tmp = a->solve_work; 1013 1014 PetscCall(ISGetIndices(isrow, &rout)); 1015 r = rout; 1016 PetscCall(ISGetIndices(iscol, &cout)); 1017 c = cout + (n - 1); 1018 1019 /* forward solve the lower triangular */ 1020 tmp[0] = b[*r++]; 1021 tmps = tmp; 1022 for (row = 1; row < n; row++) { 1023 i = rout[row]; /* permuted row */ 1024 v = aa + ai[i]; 1025 vi = aj + ai[i]; 1026 nz = adiag[i] - ai[i]; 1027 sum = b[*r++]; 1028 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1029 tmp[row] = sum; 1030 } 1031 1032 /* backward solve the upper triangular */ 1033 for (row = n - 1; row >= 0; row--) { 1034 i = rout[row]; /* permuted row */ 1035 v = aa + adiag[i] + 1; 1036 vi = aj + adiag[i] + 1; 1037 nz = ai[i + 1] - adiag[i] - 1; 1038 sum = tmp[row]; 1039 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1040 x[*c--] = tmp[row] = sum * aa[adiag[i]]; 1041 } 1042 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1043 PetscCall(ISRestoreIndices(isrow, &rout)); 1044 PetscCall(ISRestoreIndices(iscol, &cout)); 1045 PetscCall(VecRestoreArrayRead(bb, &b)); 1046 PetscCall(VecRestoreArrayWrite(xx, &x)); 1047 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1048 PetscFunctionReturn(PETSC_SUCCESS); 1049 } 1050 1051 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1052 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1053 { 1054 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1055 PetscInt n = A->rmap->n; 1056 const PetscInt *ai = a->i, *aj = a->j, *adiag; 1057 PetscScalar *x; 1058 const PetscScalar *b; 1059 const MatScalar *aa; 1060 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1061 PetscInt adiag_i, i, nz, ai_i; 1062 const PetscInt *vi; 1063 const MatScalar *v; 1064 PetscScalar sum; 1065 #endif 1066 1067 PetscFunctionBegin; 1068 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1069 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1070 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1071 PetscCall(VecGetArrayRead(bb, &b)); 1072 PetscCall(VecGetArrayWrite(xx, &x)); 1073 1074 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1075 fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 1076 #else 1077 /* forward solve the lower triangular */ 1078 x[0] = b[0]; 1079 for (i = 1; i < n; i++) { 1080 ai_i = ai[i]; 1081 v = aa + ai_i; 1082 vi = aj + ai_i; 1083 nz = adiag[i] - ai_i; 1084 sum = b[i]; 1085 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1086 x[i] = sum; 1087 } 1088 1089 /* backward solve the upper triangular */ 1090 for (i = n - 1; i >= 0; i--) { 1091 adiag_i = adiag[i]; 1092 v = aa + adiag_i + 1; 1093 vi = aj + adiag_i + 1; 1094 nz = ai[i + 1] - adiag_i - 1; 1095 sum = x[i]; 1096 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1097 x[i] = sum * aa[adiag_i]; 1098 } 1099 #endif 1100 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1101 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1102 PetscCall(VecRestoreArrayRead(bb, &b)); 1103 PetscCall(VecRestoreArrayWrite(xx, &x)); 1104 PetscFunctionReturn(PETSC_SUCCESS); 1105 } 1106 1107 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1108 { 1109 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1110 IS iscol = a->col, isrow = a->row; 1111 PetscInt i, n = A->rmap->n, j; 1112 PetscInt nz; 1113 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag; 1114 PetscScalar *x, *tmp, sum; 1115 const PetscScalar *b; 1116 const MatScalar *aa, *v; 1117 1118 PetscFunctionBegin; 1119 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1120 1121 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1122 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1123 PetscCall(VecGetArrayRead(bb, &b)); 1124 PetscCall(VecGetArray(xx, &x)); 1125 tmp = a->solve_work; 1126 1127 PetscCall(ISGetIndices(isrow, &rout)); 1128 r = rout; 1129 PetscCall(ISGetIndices(iscol, &cout)); 1130 c = cout + (n - 1); 1131 1132 /* forward solve the lower triangular */ 1133 tmp[0] = b[*r++]; 1134 for (i = 1; i < n; i++) { 1135 v = aa + ai[i]; 1136 vi = aj + ai[i]; 1137 nz = adiag[i] - ai[i]; 1138 sum = b[*r++]; 1139 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1140 tmp[i] = sum; 1141 } 1142 1143 /* backward solve the upper triangular */ 1144 for (i = n - 1; i >= 0; i--) { 1145 v = aa + adiag[i] + 1; 1146 vi = aj + adiag[i] + 1; 1147 nz = ai[i + 1] - adiag[i] - 1; 1148 sum = tmp[i]; 1149 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1150 tmp[i] = sum * aa[adiag[i]]; 1151 x[*c--] += tmp[i]; 1152 } 1153 1154 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1155 PetscCall(ISRestoreIndices(isrow, &rout)); 1156 PetscCall(ISRestoreIndices(iscol, &cout)); 1157 PetscCall(VecRestoreArrayRead(bb, &b)); 1158 PetscCall(VecRestoreArray(xx, &x)); 1159 PetscCall(PetscLogFlops(2.0 * a->nz)); 1160 PetscFunctionReturn(PETSC_SUCCESS); 1161 } 1162 1163 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1164 { 1165 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1166 IS iscol = a->col, isrow = a->row; 1167 PetscInt i, n = A->rmap->n, j; 1168 PetscInt nz; 1169 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag; 1170 PetscScalar *x, *tmp, sum; 1171 const PetscScalar *b; 1172 const MatScalar *aa, *v; 1173 1174 PetscFunctionBegin; 1175 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1176 1177 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1178 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1179 PetscCall(VecGetArrayRead(bb, &b)); 1180 PetscCall(VecGetArray(xx, &x)); 1181 tmp = a->solve_work; 1182 1183 PetscCall(ISGetIndices(isrow, &rout)); 1184 r = rout; 1185 PetscCall(ISGetIndices(iscol, &cout)); 1186 c = cout; 1187 1188 /* forward solve the lower triangular */ 1189 tmp[0] = b[r[0]]; 1190 v = aa; 1191 vi = aj; 1192 for (i = 1; i < n; i++) { 1193 nz = ai[i + 1] - ai[i]; 1194 sum = b[r[i]]; 1195 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1196 tmp[i] = sum; 1197 v += nz; 1198 vi += nz; 1199 } 1200 1201 /* backward solve the upper triangular */ 1202 v = aa + adiag[n - 1]; 1203 vi = aj + adiag[n - 1]; 1204 for (i = n - 1; i >= 0; i--) { 1205 nz = adiag[i] - adiag[i + 1] - 1; 1206 sum = tmp[i]; 1207 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1208 tmp[i] = sum * v[nz]; 1209 x[c[i]] += tmp[i]; 1210 v += nz + 1; 1211 vi += nz + 1; 1212 } 1213 1214 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1215 PetscCall(ISRestoreIndices(isrow, &rout)); 1216 PetscCall(ISRestoreIndices(iscol, &cout)); 1217 PetscCall(VecRestoreArrayRead(bb, &b)); 1218 PetscCall(VecRestoreArray(xx, &x)); 1219 PetscCall(PetscLogFlops(2.0 * a->nz)); 1220 PetscFunctionReturn(PETSC_SUCCESS); 1221 } 1222 1223 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1224 { 1225 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1226 IS iscol = a->col, isrow = a->row; 1227 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1228 PetscInt i, n = A->rmap->n, j; 1229 PetscInt nz; 1230 PetscScalar *x, *tmp, s1; 1231 const MatScalar *aa, *v; 1232 const PetscScalar *b; 1233 1234 PetscFunctionBegin; 1235 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1236 PetscCall(VecGetArrayRead(bb, &b)); 1237 PetscCall(VecGetArrayWrite(xx, &x)); 1238 tmp = a->solve_work; 1239 1240 PetscCall(ISGetIndices(isrow, &rout)); 1241 r = rout; 1242 PetscCall(ISGetIndices(iscol, &cout)); 1243 c = cout; 1244 1245 /* copy the b into temp work space according to permutation */ 1246 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1247 1248 /* forward solve the U^T */ 1249 for (i = 0; i < n; i++) { 1250 v = aa + diag[i]; 1251 vi = aj + diag[i] + 1; 1252 nz = ai[i + 1] - diag[i] - 1; 1253 s1 = tmp[i]; 1254 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1255 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1256 tmp[i] = s1; 1257 } 1258 1259 /* backward solve the L^T */ 1260 for (i = n - 1; i >= 0; i--) { 1261 v = aa + diag[i] - 1; 1262 vi = aj + diag[i] - 1; 1263 nz = diag[i] - ai[i]; 1264 s1 = tmp[i]; 1265 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1266 } 1267 1268 /* copy tmp into x according to permutation */ 1269 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1270 1271 PetscCall(ISRestoreIndices(isrow, &rout)); 1272 PetscCall(ISRestoreIndices(iscol, &cout)); 1273 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1274 PetscCall(VecRestoreArrayRead(bb, &b)); 1275 PetscCall(VecRestoreArrayWrite(xx, &x)); 1276 1277 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1278 PetscFunctionReturn(PETSC_SUCCESS); 1279 } 1280 1281 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1282 { 1283 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1284 IS iscol = a->col, isrow = a->row; 1285 const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi; 1286 PetscInt i, n = A->rmap->n, j; 1287 PetscInt nz; 1288 PetscScalar *x, *tmp, s1; 1289 const MatScalar *aa, *v; 1290 const PetscScalar *b; 1291 1292 PetscFunctionBegin; 1293 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1294 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1295 PetscCall(VecGetArrayRead(bb, &b)); 1296 PetscCall(VecGetArrayWrite(xx, &x)); 1297 tmp = a->solve_work; 1298 1299 PetscCall(ISGetIndices(isrow, &rout)); 1300 r = rout; 1301 PetscCall(ISGetIndices(iscol, &cout)); 1302 c = cout; 1303 1304 /* copy the b into temp work space according to permutation */ 1305 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1306 1307 /* forward solve the U^T */ 1308 for (i = 0; i < n; i++) { 1309 v = aa + adiag[i + 1] + 1; 1310 vi = aj + adiag[i + 1] + 1; 1311 nz = adiag[i] - adiag[i + 1] - 1; 1312 s1 = tmp[i]; 1313 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1314 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1315 tmp[i] = s1; 1316 } 1317 1318 /* backward solve the L^T */ 1319 for (i = n - 1; i >= 0; i--) { 1320 v = aa + ai[i]; 1321 vi = aj + ai[i]; 1322 nz = ai[i + 1] - ai[i]; 1323 s1 = tmp[i]; 1324 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1325 } 1326 1327 /* copy tmp into x according to permutation */ 1328 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1329 1330 PetscCall(ISRestoreIndices(isrow, &rout)); 1331 PetscCall(ISRestoreIndices(iscol, &cout)); 1332 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1333 PetscCall(VecRestoreArrayRead(bb, &b)); 1334 PetscCall(VecRestoreArrayWrite(xx, &x)); 1335 1336 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1337 PetscFunctionReturn(PETSC_SUCCESS); 1338 } 1339 1340 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1341 { 1342 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1343 IS iscol = a->col, isrow = a->row; 1344 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1345 PetscInt i, n = A->rmap->n, j; 1346 PetscInt nz; 1347 PetscScalar *x, *tmp, s1; 1348 const MatScalar *aa, *v; 1349 const PetscScalar *b; 1350 1351 PetscFunctionBegin; 1352 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1353 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1354 PetscCall(VecGetArrayRead(bb, &b)); 1355 PetscCall(VecGetArray(xx, &x)); 1356 tmp = a->solve_work; 1357 1358 PetscCall(ISGetIndices(isrow, &rout)); 1359 r = rout; 1360 PetscCall(ISGetIndices(iscol, &cout)); 1361 c = cout; 1362 1363 /* copy the b into temp work space according to permutation */ 1364 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1365 1366 /* forward solve the U^T */ 1367 for (i = 0; i < n; i++) { 1368 v = aa + diag[i]; 1369 vi = aj + diag[i] + 1; 1370 nz = ai[i + 1] - diag[i] - 1; 1371 s1 = tmp[i]; 1372 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1373 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1374 tmp[i] = s1; 1375 } 1376 1377 /* backward solve the L^T */ 1378 for (i = n - 1; i >= 0; i--) { 1379 v = aa + diag[i] - 1; 1380 vi = aj + diag[i] - 1; 1381 nz = diag[i] - ai[i]; 1382 s1 = tmp[i]; 1383 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1384 } 1385 1386 /* copy tmp into x according to permutation */ 1387 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1388 1389 PetscCall(ISRestoreIndices(isrow, &rout)); 1390 PetscCall(ISRestoreIndices(iscol, &cout)); 1391 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1392 PetscCall(VecRestoreArrayRead(bb, &b)); 1393 PetscCall(VecRestoreArray(xx, &x)); 1394 1395 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1396 PetscFunctionReturn(PETSC_SUCCESS); 1397 } 1398 1399 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1400 { 1401 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1402 IS iscol = a->col, isrow = a->row; 1403 const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi; 1404 PetscInt i, n = A->rmap->n, j; 1405 PetscInt nz; 1406 PetscScalar *x, *tmp, s1; 1407 const MatScalar *aa, *v; 1408 const PetscScalar *b; 1409 1410 PetscFunctionBegin; 1411 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1412 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1413 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1414 PetscCall(VecGetArrayRead(bb, &b)); 1415 PetscCall(VecGetArray(xx, &x)); 1416 tmp = a->solve_work; 1417 1418 PetscCall(ISGetIndices(isrow, &rout)); 1419 r = rout; 1420 PetscCall(ISGetIndices(iscol, &cout)); 1421 c = cout; 1422 1423 /* copy the b into temp work space according to permutation */ 1424 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1425 1426 /* forward solve the U^T */ 1427 for (i = 0; i < n; i++) { 1428 v = aa + adiag[i + 1] + 1; 1429 vi = aj + adiag[i + 1] + 1; 1430 nz = adiag[i] - adiag[i + 1] - 1; 1431 s1 = tmp[i]; 1432 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1433 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1434 tmp[i] = s1; 1435 } 1436 1437 /* backward solve the L^T */ 1438 for (i = n - 1; i >= 0; i--) { 1439 v = aa + ai[i]; 1440 vi = aj + ai[i]; 1441 nz = ai[i + 1] - ai[i]; 1442 s1 = tmp[i]; 1443 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1444 } 1445 1446 /* copy tmp into x according to permutation */ 1447 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1448 1449 PetscCall(ISRestoreIndices(isrow, &rout)); 1450 PetscCall(ISRestoreIndices(iscol, &cout)); 1451 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1452 PetscCall(VecRestoreArrayRead(bb, &b)); 1453 PetscCall(VecRestoreArray(xx, &x)); 1454 1455 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1456 PetscFunctionReturn(PETSC_SUCCESS); 1457 } 1458 1459 /* 1460 ilu() under revised new data structure. 1461 Factored arrays bj and ba are stored as 1462 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1463 1464 bi=fact->i is an array of size n+1, in which 1465 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1466 bi[n]: points to L(n-1,n-1)+1 1467 1468 bdiag=fact->diag is an array of size n+1,in which 1469 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1470 bdiag[n]: points to entry of U(n-1,0)-1 1471 1472 U(i,:) contains bdiag[i] as its last entry, i.e., 1473 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1474 */ 1475 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1476 { 1477 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1478 const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag; 1479 PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 1480 IS isicol; 1481 1482 PetscFunctionBegin; 1483 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1484 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1485 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1486 b = (Mat_SeqAIJ *)fact->data; 1487 1488 /* allocate matrix arrays for new data structure */ 1489 PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a)); 1490 PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j)); 1491 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i)); 1492 if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1493 b->free_a = PETSC_TRUE; 1494 b->free_ij = PETSC_TRUE; 1495 1496 if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag)); 1497 bdiag = b->diag; 1498 1499 /* set bi and bj with new data structure */ 1500 bi = b->i; 1501 bj = b->j; 1502 1503 /* L part */ 1504 bi[0] = 0; 1505 for (i = 0; i < n; i++) { 1506 nz = adiag[i] - ai[i]; 1507 bi[i + 1] = bi[i] + nz; 1508 aj = a->j + ai[i]; 1509 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1510 } 1511 1512 /* U part */ 1513 bdiag[n] = bi[n] - 1; 1514 for (i = n - 1; i >= 0; i--) { 1515 nz = ai[i + 1] - adiag[i] - 1; 1516 aj = a->j + adiag[i] + 1; 1517 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1518 /* diag[i] */ 1519 bj[k++] = i; 1520 bdiag[i] = bdiag[i + 1] + nz + 1; 1521 } 1522 1523 fact->factortype = MAT_FACTOR_ILU; 1524 fact->info.factor_mallocs = 0; 1525 fact->info.fill_ratio_given = info->fill; 1526 fact->info.fill_ratio_needed = 1.0; 1527 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1528 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1529 1530 b = (Mat_SeqAIJ *)fact->data; 1531 b->row = isrow; 1532 b->col = iscol; 1533 b->icol = isicol; 1534 PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work)); 1535 PetscCall(PetscObjectReference((PetscObject)isrow)); 1536 PetscCall(PetscObjectReference((PetscObject)iscol)); 1537 PetscFunctionReturn(PETSC_SUCCESS); 1538 } 1539 1540 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1541 { 1542 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1543 IS isicol; 1544 const PetscInt *r, *ic; 1545 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1546 PetscInt *bi, *cols, nnz, *cols_lvl; 1547 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1548 PetscInt i, levels, diagonal_fill; 1549 PetscBool col_identity, row_identity; 1550 PetscReal f; 1551 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1552 PetscBT lnkbt; 1553 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1554 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1555 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1556 PetscBool diagDense; 1557 1558 PetscFunctionBegin; 1559 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1560 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense)); 1561 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 1562 1563 levels = (PetscInt)info->levels; 1564 PetscCall(ISIdentity(isrow, &row_identity)); 1565 PetscCall(ISIdentity(iscol, &col_identity)); 1566 if (!levels && row_identity && col_identity) { 1567 /* special case: ilu(0) with natural ordering */ 1568 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1569 if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1570 PetscFunctionReturn(PETSC_SUCCESS); 1571 } 1572 1573 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1574 PetscCall(ISGetIndices(isrow, &r)); 1575 PetscCall(ISGetIndices(isicol, &ic)); 1576 1577 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1578 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 1579 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1580 bi[0] = bdiag[0] = 0; 1581 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1582 1583 /* create a linked list for storing column indices of the active row */ 1584 nlnk = n + 1; 1585 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1586 1587 /* initial FreeSpace size is f*(ai[n]+1) */ 1588 f = info->fill; 1589 diagonal_fill = (PetscInt)info->diagonal_fill; 1590 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1591 current_space = free_space; 1592 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1593 current_space_lvl = free_space_lvl; 1594 for (i = 0; i < n; i++) { 1595 nzi = 0; 1596 /* copy current row into linked list */ 1597 nnz = ai[r[i] + 1] - ai[r[i]]; 1598 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1599 cols = aj + ai[r[i]]; 1600 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1601 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1602 nzi += nlnk; 1603 1604 /* make sure diagonal entry is included */ 1605 if (diagonal_fill && lnk[i] == -1) { 1606 fm = n; 1607 while (lnk[fm] < i) fm = lnk[fm]; 1608 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1609 lnk[fm] = i; 1610 lnk_lvl[i] = 0; 1611 nzi++; 1612 dcount++; 1613 } 1614 1615 /* add pivot rows into the active row */ 1616 nzbd = 0; 1617 prow = lnk[n]; 1618 while (prow < i) { 1619 nnz = bdiag[prow]; 1620 cols = bj_ptr[prow] + nnz + 1; 1621 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1622 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1623 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1624 nzi += nlnk; 1625 prow = lnk[prow]; 1626 nzbd++; 1627 } 1628 bdiag[i] = nzbd; 1629 bi[i + 1] = bi[i] + nzi; 1630 /* if free space is not available, make more free space */ 1631 if (current_space->local_remaining < nzi) { 1632 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 1633 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1634 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1635 reallocs++; 1636 } 1637 1638 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1639 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1640 bj_ptr[i] = current_space->array; 1641 bjlvl_ptr[i] = current_space_lvl->array; 1642 1643 /* make sure the active row i has diagonal entry */ 1644 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1645 1646 current_space->array += nzi; 1647 current_space->local_used += nzi; 1648 current_space->local_remaining -= nzi; 1649 current_space_lvl->array += nzi; 1650 current_space_lvl->local_used += nzi; 1651 current_space_lvl->local_remaining -= nzi; 1652 } 1653 1654 PetscCall(ISRestoreIndices(isrow, &r)); 1655 PetscCall(ISRestoreIndices(isicol, &ic)); 1656 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1657 PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj)); 1658 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1659 1660 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1661 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1662 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1663 1664 #if defined(PETSC_USE_INFO) 1665 { 1666 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1667 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1668 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1669 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1670 PetscCall(PetscInfo(A, "for best performance.\n")); 1671 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1672 } 1673 #endif 1674 /* put together the new matrix */ 1675 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1676 b = (Mat_SeqAIJ *)fact->data; 1677 b->free_ij = PETSC_TRUE; 1678 PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 1679 b->free_a = PETSC_TRUE; 1680 b->j = bj; 1681 b->i = bi; 1682 b->diag = bdiag; 1683 b->ilen = NULL; 1684 b->imax = NULL; 1685 b->row = isrow; 1686 b->col = iscol; 1687 PetscCall(PetscObjectReference((PetscObject)isrow)); 1688 PetscCall(PetscObjectReference((PetscObject)iscol)); 1689 b->icol = isicol; 1690 1691 PetscCall(PetscMalloc1(n, &b->solve_work)); 1692 /* In b structure: Free imax, ilen, old a, old j. 1693 Allocate bdiag, solve_work, new a, new j */ 1694 b->maxnz = b->nz = bdiag[0] + 1; 1695 1696 fact->info.factor_mallocs = reallocs; 1697 fact->info.fill_ratio_given = f; 1698 fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1699 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1700 if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1701 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1702 PetscFunctionReturn(PETSC_SUCCESS); 1703 } 1704 1705 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 1706 { 1707 Mat C = B; 1708 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1709 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 1710 IS ip = b->row, iip = b->icol; 1711 const PetscInt *rip, *riip; 1712 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 1713 PetscInt *ai = a->i, *aj = a->j; 1714 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 1715 MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi; 1716 PetscBool perm_identity; 1717 FactorShiftCtx sctx; 1718 PetscReal rs; 1719 const MatScalar *aa, *v; 1720 MatScalar d; 1721 const PetscInt *adiag; 1722 1723 PetscFunctionBegin; 1724 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1725 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1726 /* MatPivotSetUp(): initialize shift context sctx */ 1727 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1728 1729 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 1730 sctx.shift_top = info->zeropivot; 1731 for (i = 0; i < mbs; i++) { 1732 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1733 d = aa[adiag[i]]; 1734 rs = -PetscAbsScalar(d) - PetscRealPart(d); 1735 v = aa + ai[i]; 1736 nz = ai[i + 1] - ai[i]; 1737 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 1738 if (rs > sctx.shift_top) sctx.shift_top = rs; 1739 } 1740 sctx.shift_top *= 1.1; 1741 sctx.nshift_max = 5; 1742 sctx.shift_lo = 0.; 1743 sctx.shift_hi = 1.; 1744 } 1745 1746 PetscCall(ISGetIndices(ip, &rip)); 1747 PetscCall(ISGetIndices(iip, &riip)); 1748 1749 /* allocate working arrays 1750 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 1751 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 1752 */ 1753 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 1754 1755 do { 1756 sctx.newshift = PETSC_FALSE; 1757 1758 for (i = 0; i < mbs; i++) c2r[i] = mbs; 1759 if (mbs) il[0] = 0; 1760 1761 for (k = 0; k < mbs; k++) { 1762 /* zero rtmp */ 1763 nz = bi[k + 1] - bi[k]; 1764 bjtmp = bj + bi[k]; 1765 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 1766 1767 /* load in initial unfactored row */ 1768 bval = ba + bi[k]; 1769 jmin = ai[rip[k]]; 1770 jmax = ai[rip[k] + 1]; 1771 for (j = jmin; j < jmax; j++) { 1772 col = riip[aj[j]]; 1773 if (col >= k) { /* only take upper triangular entry */ 1774 rtmp[col] = aa[j]; 1775 *bval++ = 0.0; /* for in-place factorization */ 1776 } 1777 } 1778 /* shift the diagonal of the matrix: ZeropivotApply() */ 1779 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 1780 1781 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1782 dk = rtmp[k]; 1783 i = c2r[k]; /* first row to be added to k_th row */ 1784 1785 while (i < k) { 1786 nexti = c2r[i]; /* next row to be added to k_th row */ 1787 1788 /* compute multiplier, update diag(k) and U(i,k) */ 1789 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1790 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 1791 dk += uikdi * ba[ili]; /* update diag[k] */ 1792 ba[ili] = uikdi; /* -U(i,k) */ 1793 1794 /* add multiple of row i to k-th row */ 1795 jmin = ili + 1; 1796 jmax = bi[i + 1]; 1797 if (jmin < jmax) { 1798 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1799 /* update il and c2r for row i */ 1800 il[i] = jmin; 1801 j = bj[jmin]; 1802 c2r[i] = c2r[j]; 1803 c2r[j] = i; 1804 } 1805 i = nexti; 1806 } 1807 1808 /* copy data into U(k,:) */ 1809 rs = 0.0; 1810 jmin = bi[k]; 1811 jmax = bi[k + 1] - 1; 1812 if (jmin < jmax) { 1813 for (j = jmin; j < jmax; j++) { 1814 col = bj[j]; 1815 ba[j] = rtmp[col]; 1816 rs += PetscAbsScalar(ba[j]); 1817 } 1818 /* add the k-th row into il and c2r */ 1819 il[k] = jmin; 1820 i = bj[jmin]; 1821 c2r[k] = c2r[i]; 1822 c2r[i] = k; 1823 } 1824 1825 /* MatPivotCheck() */ 1826 sctx.rs = rs; 1827 sctx.pv = dk; 1828 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 1829 if (sctx.newshift) break; 1830 dk = sctx.pv; 1831 1832 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 1833 } 1834 } while (sctx.newshift); 1835 1836 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1837 PetscCall(PetscFree3(rtmp, il, c2r)); 1838 PetscCall(ISRestoreIndices(ip, &rip)); 1839 PetscCall(ISRestoreIndices(iip, &riip)); 1840 1841 PetscCall(ISIdentity(ip, &perm_identity)); 1842 if (perm_identity) { 1843 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1844 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1845 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1846 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1847 } else { 1848 B->ops->solve = MatSolve_SeqSBAIJ_1; 1849 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1850 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1851 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1852 } 1853 1854 C->assembled = PETSC_TRUE; 1855 C->preallocated = PETSC_TRUE; 1856 1857 PetscCall(PetscLogFlops(C->rmap->n)); 1858 1859 /* MatPivotView() */ 1860 if (sctx.nshift) { 1861 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1862 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 1863 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1864 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1865 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 1866 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 1867 } 1868 } 1869 PetscFunctionReturn(PETSC_SUCCESS); 1870 } 1871 1872 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 1873 { 1874 Mat C = B; 1875 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1876 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 1877 IS ip = b->row, iip = b->icol; 1878 const PetscInt *rip, *riip; 1879 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 1880 PetscInt *ai = a->i, *aj = a->j; 1881 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 1882 MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi; 1883 const MatScalar *aa, *v; 1884 PetscBool perm_identity; 1885 FactorShiftCtx sctx; 1886 PetscReal rs; 1887 MatScalar d; 1888 const PetscInt *adiag; 1889 1890 PetscFunctionBegin; 1891 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1892 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1893 /* MatPivotSetUp(): initialize shift context sctx */ 1894 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1895 1896 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 1897 sctx.shift_top = info->zeropivot; 1898 for (i = 0; i < mbs; i++) { 1899 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1900 d = aa[adiag[i]]; 1901 rs = -PetscAbsScalar(d) - PetscRealPart(d); 1902 v = aa + ai[i]; 1903 nz = ai[i + 1] - ai[i]; 1904 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 1905 if (rs > sctx.shift_top) sctx.shift_top = rs; 1906 } 1907 sctx.shift_top *= 1.1; 1908 sctx.nshift_max = 5; 1909 sctx.shift_lo = 0.; 1910 sctx.shift_hi = 1.; 1911 } 1912 1913 PetscCall(ISGetIndices(ip, &rip)); 1914 PetscCall(ISGetIndices(iip, &riip)); 1915 1916 /* initialization */ 1917 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 1918 1919 do { 1920 sctx.newshift = PETSC_FALSE; 1921 1922 for (i = 0; i < mbs; i++) jl[i] = mbs; 1923 il[0] = 0; 1924 1925 for (k = 0; k < mbs; k++) { 1926 /* zero rtmp */ 1927 nz = bi[k + 1] - bi[k]; 1928 bjtmp = bj + bi[k]; 1929 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 1930 1931 bval = ba + bi[k]; 1932 /* initialize k-th row by the perm[k]-th row of A */ 1933 jmin = ai[rip[k]]; 1934 jmax = ai[rip[k] + 1]; 1935 for (j = jmin; j < jmax; j++) { 1936 col = riip[aj[j]]; 1937 if (col >= k) { /* only take upper triangular entry */ 1938 rtmp[col] = aa[j]; 1939 *bval++ = 0.0; /* for in-place factorization */ 1940 } 1941 } 1942 /* shift the diagonal of the matrix */ 1943 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1944 1945 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1946 dk = rtmp[k]; 1947 i = jl[k]; /* first row to be added to k_th row */ 1948 1949 while (i < k) { 1950 nexti = jl[i]; /* next row to be added to k_th row */ 1951 1952 /* compute multiplier, update diag(k) and U(i,k) */ 1953 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1954 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 1955 dk += uikdi * ba[ili]; 1956 ba[ili] = uikdi; /* -U(i,k) */ 1957 1958 /* add multiple of row i to k-th row */ 1959 jmin = ili + 1; 1960 jmax = bi[i + 1]; 1961 if (jmin < jmax) { 1962 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1963 /* update il and jl for row i */ 1964 il[i] = jmin; 1965 j = bj[jmin]; 1966 jl[i] = jl[j]; 1967 jl[j] = i; 1968 } 1969 i = nexti; 1970 } 1971 1972 /* shift the diagonals when zero pivot is detected */ 1973 /* compute rs=sum of abs(off-diagonal) */ 1974 rs = 0.0; 1975 jmin = bi[k] + 1; 1976 nz = bi[k + 1] - jmin; 1977 bcol = bj + jmin; 1978 for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 1979 1980 sctx.rs = rs; 1981 sctx.pv = dk; 1982 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 1983 if (sctx.newshift) break; 1984 dk = sctx.pv; 1985 1986 /* copy data into U(k,:) */ 1987 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 1988 jmin = bi[k] + 1; 1989 jmax = bi[k + 1]; 1990 if (jmin < jmax) { 1991 for (j = jmin; j < jmax; j++) { 1992 col = bj[j]; 1993 ba[j] = rtmp[col]; 1994 } 1995 /* add the k-th row into il and jl */ 1996 il[k] = jmin; 1997 i = bj[jmin]; 1998 jl[k] = jl[i]; 1999 jl[i] = k; 2000 } 2001 } 2002 } while (sctx.newshift); 2003 2004 PetscCall(PetscFree3(rtmp, il, jl)); 2005 PetscCall(ISRestoreIndices(ip, &rip)); 2006 PetscCall(ISRestoreIndices(iip, &riip)); 2007 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 2008 2009 PetscCall(ISIdentity(ip, &perm_identity)); 2010 if (perm_identity) { 2011 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2012 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2013 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2014 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2015 } else { 2016 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2017 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2018 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2019 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2020 } 2021 2022 C->assembled = PETSC_TRUE; 2023 C->preallocated = PETSC_TRUE; 2024 2025 PetscCall(PetscLogFlops(C->rmap->n)); 2026 if (sctx.nshift) { 2027 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2028 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2029 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2030 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2031 } 2032 } 2033 PetscFunctionReturn(PETSC_SUCCESS); 2034 } 2035 2036 /* 2037 icc() under revised new data structure. 2038 Factored arrays bj and ba are stored as 2039 U(0,:),...,U(i,:),U(n-1,:) 2040 2041 ui=fact->i is an array of size n+1, in which 2042 ui+ 2043 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2044 ui[n]: points to U(n-1,n-1)+1 2045 2046 udiag=fact->diag is an array of size n,in which 2047 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2048 2049 U(i,:) contains udiag[i] as its last entry, i.e., 2050 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2051 */ 2052 2053 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2054 { 2055 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2056 Mat_SeqSBAIJ *b; 2057 PetscBool perm_identity; 2058 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2059 const PetscInt *rip, *riip, *adiag; 2060 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2061 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 2062 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2063 PetscReal fill = info->fill, levels = info->levels; 2064 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2065 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2066 PetscBT lnkbt; 2067 IS iperm; 2068 PetscBool diagDense; 2069 2070 PetscFunctionBegin; 2071 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2072 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense)); 2073 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 2074 PetscCall(ISIdentity(perm, &perm_identity)); 2075 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2076 2077 PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 2078 PetscCall(PetscMalloc1(am + 1, &udiag)); 2079 ui[0] = 0; 2080 2081 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2082 if (!levels && perm_identity) { 2083 for (i = 0; i < am; i++) { 2084 ncols = ai[i + 1] - adiag[i]; 2085 ui[i + 1] = ui[i] + ncols; 2086 udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2087 } 2088 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2089 cols = uj; 2090 for (i = 0; i < am; i++) { 2091 aj = a->j + adiag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2092 ncols = ai[i + 1] - adiag[i] - 1; 2093 for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2094 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2095 } 2096 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2097 PetscCall(ISGetIndices(iperm, &riip)); 2098 PetscCall(ISGetIndices(perm, &rip)); 2099 2100 /* initialization */ 2101 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2102 2103 /* jl: linked list for storing indices of the pivot rows 2104 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2105 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2106 for (i = 0; i < am; i++) { 2107 jl[i] = am; 2108 il[i] = 0; 2109 } 2110 2111 /* create and initialize a linked list for storing column indices of the active row k */ 2112 nlnk = am + 1; 2113 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2114 2115 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2116 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2117 current_space = free_space; 2118 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 2119 current_space_lvl = free_space_lvl; 2120 2121 for (k = 0; k < am; k++) { /* for each active row k */ 2122 /* initialize lnk by the column indices of row rip[k] of A */ 2123 nzk = 0; 2124 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2125 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2126 ncols_upper = 0; 2127 for (j = 0; j < ncols; j++) { 2128 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2129 if (riip[i] >= k) { /* only take upper triangular entry */ 2130 ajtmp[ncols_upper] = i; 2131 ncols_upper++; 2132 } 2133 } 2134 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2135 nzk += nlnk; 2136 2137 /* update lnk by computing fill-in for each pivot row to be merged in */ 2138 prow = jl[k]; /* 1st pivot row */ 2139 2140 while (prow < k) { 2141 nextprow = jl[prow]; 2142 2143 /* merge prow into k-th row */ 2144 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2145 jmax = ui[prow + 1]; 2146 ncols = jmax - jmin; 2147 i = jmin - ui[prow]; 2148 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2149 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2150 j = *(uj - 1); 2151 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2152 nzk += nlnk; 2153 2154 /* update il and jl for prow */ 2155 if (jmin < jmax) { 2156 il[prow] = jmin; 2157 j = *cols; 2158 jl[prow] = jl[j]; 2159 jl[j] = prow; 2160 } 2161 prow = nextprow; 2162 } 2163 2164 /* if free space is not available, make more free space */ 2165 if (current_space->local_remaining < nzk) { 2166 i = am - k + 1; /* num of unfactored rows */ 2167 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2168 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2169 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2170 reallocs++; 2171 } 2172 2173 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2174 PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2175 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2176 2177 /* add the k-th row into il and jl */ 2178 if (nzk > 1) { 2179 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2180 jl[k] = jl[i]; 2181 jl[i] = k; 2182 il[k] = ui[k] + 1; 2183 } 2184 uj_ptr[k] = current_space->array; 2185 uj_lvl_ptr[k] = current_space_lvl->array; 2186 2187 current_space->array += nzk; 2188 current_space->local_used += nzk; 2189 current_space->local_remaining -= nzk; 2190 2191 current_space_lvl->array += nzk; 2192 current_space_lvl->local_used += nzk; 2193 current_space_lvl->local_remaining -= nzk; 2194 2195 ui[k + 1] = ui[k] + nzk; 2196 } 2197 2198 PetscCall(ISRestoreIndices(perm, &rip)); 2199 PetscCall(ISRestoreIndices(iperm, &riip)); 2200 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2201 PetscCall(PetscFree(ajtmp)); 2202 2203 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2204 PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj)); 2205 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2206 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2207 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2208 2209 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2210 2211 /* put together the new matrix in MATSEQSBAIJ format */ 2212 b = (Mat_SeqSBAIJ *)fact->data; 2213 b->free_ij = PETSC_TRUE; 2214 PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a)); 2215 b->free_a = PETSC_TRUE; 2216 b->j = uj; 2217 b->i = ui; 2218 b->diag = udiag; 2219 b->ilen = NULL; 2220 b->imax = NULL; 2221 b->row = perm; 2222 b->col = perm; 2223 PetscCall(PetscObjectReference((PetscObject)perm)); 2224 PetscCall(PetscObjectReference((PetscObject)perm)); 2225 b->icol = iperm; 2226 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2227 2228 PetscCall(PetscMalloc1(am, &b->solve_work)); 2229 2230 b->maxnz = b->nz = ui[am]; 2231 2232 fact->info.factor_mallocs = reallocs; 2233 fact->info.fill_ratio_given = fill; 2234 if (ai[am] != 0) { 2235 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2236 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2237 } else { 2238 fact->info.fill_ratio_needed = 0.0; 2239 } 2240 #if defined(PETSC_USE_INFO) 2241 if (ai[am] != 0) { 2242 PetscReal af = fact->info.fill_ratio_needed; 2243 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2244 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2245 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2246 } else { 2247 PetscCall(PetscInfo(A, "Empty matrix\n")); 2248 } 2249 #endif 2250 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2251 PetscFunctionReturn(PETSC_SUCCESS); 2252 } 2253 2254 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2255 { 2256 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2257 Mat_SeqSBAIJ *b; 2258 PetscBool perm_identity; 2259 PetscReal fill = info->fill; 2260 const PetscInt *rip, *riip; 2261 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2262 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2263 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 2264 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2265 PetscBT lnkbt; 2266 IS iperm; 2267 PetscBool diagDense; 2268 2269 PetscFunctionBegin; 2270 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2271 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense)); 2272 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 2273 2274 /* check whether perm is the identity mapping */ 2275 PetscCall(ISIdentity(perm, &perm_identity)); 2276 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2277 PetscCall(ISGetIndices(iperm, &riip)); 2278 PetscCall(ISGetIndices(perm, &rip)); 2279 2280 /* initialization */ 2281 PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 2282 PetscCall(PetscMalloc1(am + 1, &udiag)); 2283 ui[0] = 0; 2284 2285 /* jl: linked list for storing indices of the pivot rows 2286 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2287 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2288 for (i = 0; i < am; i++) { 2289 jl[i] = am; 2290 il[i] = 0; 2291 } 2292 2293 /* create and initialize a linked list for storing column indices of the active row k */ 2294 nlnk = am + 1; 2295 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2296 2297 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2298 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2299 current_space = free_space; 2300 2301 for (k = 0; k < am; k++) { /* for each active row k */ 2302 /* initialize lnk by the column indices of row rip[k] of A */ 2303 nzk = 0; 2304 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2305 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2306 ncols_upper = 0; 2307 for (j = 0; j < ncols; j++) { 2308 i = riip[*(aj + ai[rip[k]] + j)]; 2309 if (i >= k) { /* only take upper triangular entry */ 2310 cols[ncols_upper] = i; 2311 ncols_upper++; 2312 } 2313 } 2314 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2315 nzk += nlnk; 2316 2317 /* update lnk by computing fill-in for each pivot row to be merged in */ 2318 prow = jl[k]; /* 1st pivot row */ 2319 2320 while (prow < k) { 2321 nextprow = jl[prow]; 2322 /* merge prow into k-th row */ 2323 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2324 jmax = ui[prow + 1]; 2325 ncols = jmax - jmin; 2326 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2327 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2328 nzk += nlnk; 2329 2330 /* update il and jl for prow */ 2331 if (jmin < jmax) { 2332 il[prow] = jmin; 2333 j = *uj_ptr; 2334 jl[prow] = jl[j]; 2335 jl[j] = prow; 2336 } 2337 prow = nextprow; 2338 } 2339 2340 /* if free space is not available, make more free space */ 2341 if (current_space->local_remaining < nzk) { 2342 i = am - k + 1; /* num of unfactored rows */ 2343 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2344 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2345 reallocs++; 2346 } 2347 2348 /* copy data into free space, then initialize lnk */ 2349 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2350 2351 /* add the k-th row into il and jl */ 2352 if (nzk > 1) { 2353 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2354 jl[k] = jl[i]; 2355 jl[i] = k; 2356 il[k] = ui[k] + 1; 2357 } 2358 ui_ptr[k] = current_space->array; 2359 2360 current_space->array += nzk; 2361 current_space->local_used += nzk; 2362 current_space->local_remaining -= nzk; 2363 2364 ui[k + 1] = ui[k] + nzk; 2365 } 2366 2367 PetscCall(ISRestoreIndices(perm, &rip)); 2368 PetscCall(ISRestoreIndices(iperm, &riip)); 2369 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 2370 2371 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2372 PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj)); 2373 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2374 PetscCall(PetscLLDestroy(lnk, lnkbt)); 2375 2376 /* put together the new matrix in MATSEQSBAIJ format */ 2377 b = (Mat_SeqSBAIJ *)fact->data; 2378 b->free_ij = PETSC_TRUE; 2379 PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a)); 2380 b->free_a = PETSC_TRUE; 2381 b->j = uj; 2382 b->i = ui; 2383 b->diag = udiag; 2384 b->ilen = NULL; 2385 b->imax = NULL; 2386 b->row = perm; 2387 b->col = perm; 2388 2389 PetscCall(PetscObjectReference((PetscObject)perm)); 2390 PetscCall(PetscObjectReference((PetscObject)perm)); 2391 2392 b->icol = iperm; 2393 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2394 2395 PetscCall(PetscMalloc1(am, &b->solve_work)); 2396 2397 b->maxnz = b->nz = ui[am]; 2398 2399 fact->info.factor_mallocs = reallocs; 2400 fact->info.fill_ratio_given = fill; 2401 if (ai[am] != 0) { 2402 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2403 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2404 } else { 2405 fact->info.fill_ratio_needed = 0.0; 2406 } 2407 #if defined(PETSC_USE_INFO) 2408 if (ai[am] != 0) { 2409 PetscReal af = fact->info.fill_ratio_needed; 2410 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2411 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2412 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2413 } else { 2414 PetscCall(PetscInfo(A, "Empty matrix\n")); 2415 } 2416 #endif 2417 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2418 PetscFunctionReturn(PETSC_SUCCESS); 2419 } 2420 2421 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 2422 { 2423 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2424 PetscInt n = A->rmap->n; 2425 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 2426 PetscScalar *x, sum; 2427 const PetscScalar *b; 2428 const MatScalar *aa, *v; 2429 PetscInt i, nz; 2430 2431 PetscFunctionBegin; 2432 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 2433 2434 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 2435 PetscCall(VecGetArrayRead(bb, &b)); 2436 PetscCall(VecGetArrayWrite(xx, &x)); 2437 2438 /* forward solve the lower triangular */ 2439 x[0] = b[0]; 2440 v = aa; 2441 vi = aj; 2442 for (i = 1; i < n; i++) { 2443 nz = ai[i + 1] - ai[i]; 2444 sum = b[i]; 2445 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 2446 v += nz; 2447 vi += nz; 2448 x[i] = sum; 2449 } 2450 2451 /* backward solve the upper triangular */ 2452 for (i = n - 1; i >= 0; i--) { 2453 v = aa + adiag[i + 1] + 1; 2454 vi = aj + adiag[i + 1] + 1; 2455 nz = adiag[i] - adiag[i + 1] - 1; 2456 sum = x[i]; 2457 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 2458 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 2459 } 2460 2461 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 2462 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 2463 PetscCall(VecRestoreArrayRead(bb, &b)); 2464 PetscCall(VecRestoreArrayWrite(xx, &x)); 2465 PetscFunctionReturn(PETSC_SUCCESS); 2466 } 2467 2468 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 2469 { 2470 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2471 IS iscol = a->col, isrow = a->row; 2472 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 2473 const PetscInt *rout, *cout, *r, *c; 2474 PetscScalar *x, *tmp, sum; 2475 const PetscScalar *b; 2476 const MatScalar *aa, *v; 2477 2478 PetscFunctionBegin; 2479 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 2480 2481 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 2482 PetscCall(VecGetArrayRead(bb, &b)); 2483 PetscCall(VecGetArrayWrite(xx, &x)); 2484 tmp = a->solve_work; 2485 2486 PetscCall(ISGetIndices(isrow, &rout)); 2487 r = rout; 2488 PetscCall(ISGetIndices(iscol, &cout)); 2489 c = cout; 2490 2491 /* forward solve the lower triangular */ 2492 tmp[0] = b[r[0]]; 2493 v = aa; 2494 vi = aj; 2495 for (i = 1; i < n; i++) { 2496 nz = ai[i + 1] - ai[i]; 2497 sum = b[r[i]]; 2498 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 2499 tmp[i] = sum; 2500 v += nz; 2501 vi += nz; 2502 } 2503 2504 /* backward solve the upper triangular */ 2505 for (i = n - 1; i >= 0; i--) { 2506 v = aa + adiag[i + 1] + 1; 2507 vi = aj + adiag[i + 1] + 1; 2508 nz = adiag[i] - adiag[i + 1] - 1; 2509 sum = tmp[i]; 2510 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 2511 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 2512 } 2513 2514 PetscCall(ISRestoreIndices(isrow, &rout)); 2515 PetscCall(ISRestoreIndices(iscol, &cout)); 2516 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 2517 PetscCall(VecRestoreArrayRead(bb, &b)); 2518 PetscCall(VecRestoreArrayWrite(xx, &x)); 2519 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 2520 PetscFunctionReturn(PETSC_SUCCESS); 2521 } 2522