1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <petscbt.h> 4 #include <../src/mat/utils/freespace.h> 5 6 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type) 7 { 8 PetscFunctionBegin; 9 *type = MATSOLVERPETSC; 10 PetscFunctionReturn(PETSC_SUCCESS); 11 } 12 13 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B) 14 { 15 PetscInt n = A->rmap->n; 16 17 PetscFunctionBegin; 18 if (PetscDefined(USE_COMPLEX) && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 19 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n")); 20 *B = NULL; 21 PetscFunctionReturn(PETSC_SUCCESS); 22 } 23 24 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 25 PetscCall(MatSetSizes(*B, n, n, n, n)); 26 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 27 PetscCall(MatSetType(*B, MATSEQAIJ)); 28 29 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 30 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 31 32 PetscCall(MatSetBlockSizesFromMats(*B, A, A)); 33 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU])); 34 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU])); 35 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT])); 36 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 37 PetscCall(MatSetType(*B, MATSEQSBAIJ)); 38 PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL)); 39 40 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 41 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 42 PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY])); 43 PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC])); 44 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported"); 45 (*B)->factortype = ftype; 46 47 PetscCall(PetscFree((*B)->solvertype)); 48 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype)); 49 (*B)->canuseordering = PETSC_TRUE; 50 PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc)); 51 PetscFunctionReturn(PETSC_SUCCESS); 52 } 53 54 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 55 { 56 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 57 IS isicol; 58 const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp; 59 PetscInt i, n = A->rmap->n; 60 PetscInt *bi, *bj; 61 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 62 PetscReal f; 63 PetscInt nlnk, *lnk, k, **bi_ptr; 64 PetscFreeSpaceList free_space = NULL, current_space = NULL; 65 PetscBT lnkbt; 66 PetscBool diagDense; 67 68 PetscFunctionBegin; 69 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 70 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense)); 71 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 72 73 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 74 PetscCall(ISGetIndices(isrow, &r)); 75 PetscCall(ISGetIndices(isicol, &ic)); 76 77 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 78 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 79 PetscCall(PetscMalloc1(n + 1, &bdiag)); 80 bi[0] = bdiag[0] = 0; 81 82 /* linked list for storing column indices of the active row */ 83 nlnk = n + 1; 84 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 85 86 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 87 88 /* initial FreeSpace size is f*(ai[n]+1) */ 89 f = info->fill; 90 if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 91 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 92 current_space = free_space; 93 94 for (i = 0; i < n; i++) { 95 /* copy previous fill into linked list */ 96 nzi = 0; 97 nnz = ai[r[i] + 1] - ai[r[i]]; 98 ajtmp = aj + ai[r[i]]; 99 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 100 nzi += nlnk; 101 102 /* add pivot rows into linked list */ 103 row = lnk[n]; 104 while (row < i) { 105 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 106 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 107 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 108 nzi += nlnk; 109 row = lnk[row]; 110 } 111 bi[i + 1] = bi[i] + nzi; 112 im[i] = nzi; 113 114 /* mark bdiag */ 115 nzbd = 0; 116 nnz = nzi; 117 k = lnk[n]; 118 while (nnz-- && k < i) { 119 nzbd++; 120 k = lnk[k]; 121 } 122 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 123 124 /* if free space is not available, make more free space */ 125 if (current_space->local_remaining < nzi) { 126 /* estimated additional space needed */ 127 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi)); 128 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 129 reallocs++; 130 } 131 132 /* copy data into free space, then initialize lnk */ 133 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 134 135 bi_ptr[i] = current_space->array; 136 current_space->array += nzi; 137 current_space->local_used += nzi; 138 current_space->local_remaining -= nzi; 139 } 140 141 PetscCall(ISRestoreIndices(isrow, &r)); 142 PetscCall(ISRestoreIndices(isicol, &ic)); 143 144 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 145 PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj)); 146 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 147 PetscCall(PetscLLDestroy(lnk, lnkbt)); 148 PetscCall(PetscFree2(bi_ptr, im)); 149 150 /* put together the new matrix */ 151 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL)); 152 b = (Mat_SeqAIJ *)B->data; 153 b->free_ij = PETSC_TRUE; 154 PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 155 b->free_a = PETSC_TRUE; 156 b->j = bj; 157 b->i = bi; 158 b->diag = bdiag; 159 b->ilen = NULL; 160 b->imax = NULL; 161 b->row = isrow; 162 b->col = iscol; 163 PetscCall(PetscObjectReference((PetscObject)isrow)); 164 PetscCall(PetscObjectReference((PetscObject)iscol)); 165 b->icol = isicol; 166 PetscCall(PetscMalloc1(n, &b->solve_work)); 167 168 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 169 b->maxnz = b->nz = bdiag[0] + 1; 170 171 B->factortype = MAT_FACTOR_LU; 172 B->info.factor_mallocs = reallocs; 173 B->info.fill_ratio_given = f; 174 175 if (ai[n]) { 176 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 177 } else { 178 B->info.fill_ratio_needed = 0.0; 179 } 180 #if defined(PETSC_USE_INFO) 181 if (ai[n] != 0) { 182 PetscReal af = B->info.fill_ratio_needed; 183 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 184 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 185 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 186 PetscCall(PetscInfo(A, "for best performance.\n")); 187 } else { 188 PetscCall(PetscInfo(A, "Empty matrix\n")); 189 } 190 #endif 191 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 192 if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 193 PetscCall(MatSeqAIJCheckInode_FactorLU(B)); 194 PetscFunctionReturn(PETSC_SUCCESS); 195 } 196 197 /* 198 Trouble in factorization, should we dump the original matrix? 199 */ 200 PetscErrorCode MatFactorDumpMatrix(Mat A) 201 { 202 PetscBool flg = PETSC_FALSE; 203 204 PetscFunctionBegin; 205 PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL)); 206 if (flg) { 207 PetscViewer viewer; 208 char filename[PETSC_MAX_PATH_LEN]; 209 210 PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank)); 211 PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer)); 212 PetscCall(MatView(A, viewer)); 213 PetscCall(PetscViewerDestroy(&viewer)); 214 } 215 PetscFunctionReturn(PETSC_SUCCESS); 216 } 217 218 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 219 { 220 Mat C = B; 221 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 222 IS isrow = b->row, isicol = b->icol; 223 const PetscInt *r, *ic, *ics; 224 const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag; 225 PetscInt i, j, k, nz, nzL, row, *pj; 226 const PetscInt *ajtmp, *bjtmp; 227 MatScalar *rtmp, *pc, multiplier, *pv; 228 const MatScalar *aa, *v; 229 MatScalar *ba; 230 PetscBool row_identity, col_identity; 231 FactorShiftCtx sctx; 232 const PetscInt *ddiag; 233 PetscReal rs; 234 MatScalar d; 235 236 PetscFunctionBegin; 237 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 238 PetscCall(MatSeqAIJGetArrayWrite(B, &ba)); 239 /* MatPivotSetUp(): initialize shift context sctx */ 240 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 241 242 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 243 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 244 sctx.shift_top = info->zeropivot; 245 for (i = 0; i < n; i++) { 246 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 247 d = aa[ddiag[i]]; 248 rs = -PetscAbsScalar(d) - PetscRealPart(d); 249 v = aa + ai[i]; 250 nz = ai[i + 1] - ai[i]; 251 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 252 if (rs > sctx.shift_top) sctx.shift_top = rs; 253 } 254 sctx.shift_top *= 1.1; 255 sctx.nshift_max = 5; 256 sctx.shift_lo = 0.; 257 sctx.shift_hi = 1.; 258 } 259 260 PetscCall(ISGetIndices(isrow, &r)); 261 PetscCall(ISGetIndices(isicol, &ic)); 262 PetscCall(PetscMalloc1(n + 1, &rtmp)); 263 ics = ic; 264 265 do { 266 sctx.newshift = PETSC_FALSE; 267 for (i = 0; i < n; i++) { 268 /* zero rtmp */ 269 /* L part */ 270 nz = bi[i + 1] - bi[i]; 271 bjtmp = bj + bi[i]; 272 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 273 274 /* U part */ 275 nz = bdiag[i] - bdiag[i + 1]; 276 bjtmp = bj + bdiag[i + 1] + 1; 277 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 278 279 /* load in initial (unfactored row) */ 280 nz = ai[r[i] + 1] - ai[r[i]]; 281 ajtmp = aj + ai[r[i]]; 282 v = aa + ai[r[i]]; 283 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 284 /* ZeropivotApply() */ 285 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 286 287 /* elimination */ 288 bjtmp = bj + bi[i]; 289 row = *bjtmp++; 290 nzL = bi[i + 1] - bi[i]; 291 for (k = 0; k < nzL; k++) { 292 pc = rtmp + row; 293 if (*pc != 0.0) { 294 pv = ba + bdiag[row]; 295 multiplier = *pc * (*pv); 296 *pc = multiplier; 297 298 pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */ 299 pv = ba + bdiag[row + 1] + 1; 300 nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */ 301 302 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 303 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 304 } 305 row = *bjtmp++; 306 } 307 308 /* finished row so stick it into b->a */ 309 rs = 0.0; 310 /* L part */ 311 pv = ba + bi[i]; 312 pj = b->j + bi[i]; 313 nz = bi[i + 1] - bi[i]; 314 for (j = 0; j < nz; j++) { 315 pv[j] = rtmp[pj[j]]; 316 rs += PetscAbsScalar(pv[j]); 317 } 318 319 /* U part */ 320 pv = ba + bdiag[i + 1] + 1; 321 pj = b->j + bdiag[i + 1] + 1; 322 nz = bdiag[i] - bdiag[i + 1] - 1; 323 for (j = 0; j < nz; j++) { 324 pv[j] = rtmp[pj[j]]; 325 rs += PetscAbsScalar(pv[j]); 326 } 327 328 sctx.rs = rs; 329 sctx.pv = rtmp[i]; 330 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 331 if (sctx.newshift) break; /* break for-loop */ 332 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 333 334 /* Mark diagonal and invert diagonal for simpler triangular solves */ 335 pv = ba + bdiag[i]; 336 *pv = 1.0 / rtmp[i]; 337 338 } /* endof for (i=0; i<n; i++) { */ 339 340 /* MatPivotRefine() */ 341 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 342 /* 343 * if no shift in this attempt & shifting & started shifting & can refine, 344 * then try lower shift 345 */ 346 sctx.shift_hi = sctx.shift_fraction; 347 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 348 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 349 sctx.newshift = PETSC_TRUE; 350 sctx.nshift++; 351 } 352 } while (sctx.newshift); 353 354 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 355 PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba)); 356 357 PetscCall(PetscFree(rtmp)); 358 PetscCall(ISRestoreIndices(isicol, &ic)); 359 PetscCall(ISRestoreIndices(isrow, &r)); 360 361 PetscCall(ISIdentity(isrow, &row_identity)); 362 PetscCall(ISIdentity(isicol, &col_identity)); 363 if (b->inode.size_csr) { 364 C->ops->solve = MatSolve_SeqAIJ_Inode; 365 } else if (row_identity && col_identity) { 366 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 367 } else { 368 C->ops->solve = MatSolve_SeqAIJ; 369 } 370 C->ops->solveadd = MatSolveAdd_SeqAIJ; 371 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 372 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 373 C->ops->matsolve = MatMatSolve_SeqAIJ; 374 C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ; 375 C->assembled = PETSC_TRUE; 376 C->preallocated = PETSC_TRUE; 377 378 PetscCall(PetscLogFlops(C->cmap->n)); 379 380 /* MatShiftView(A,info,&sctx) */ 381 if (sctx.nshift) { 382 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 383 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 384 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 385 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 386 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 387 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 388 } 389 } 390 PetscFunctionReturn(PETSC_SUCCESS); 391 } 392 393 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat); 394 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec); 395 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec); 396 397 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 398 { 399 Mat C = B; 400 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data; 401 IS isrow = b->row, isicol = b->icol; 402 const PetscInt *r, *ic, *ics; 403 PetscInt nz, row, i, j, n = A->rmap->n, diag; 404 const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 405 const PetscInt *ajtmp, *bjtmp, *ddiag, *pj; 406 MatScalar *pv, *rtmp, *pc, multiplier, d; 407 const MatScalar *v, *aa; 408 MatScalar *ba; 409 PetscReal rs = 0.0; 410 FactorShiftCtx sctx; 411 PetscBool row_identity, col_identity; 412 413 PetscFunctionBegin; 414 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 415 416 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 417 PetscCall(MatSeqAIJGetArrayWrite(B, &ba)); 418 /* MatPivotSetUp(): initialize shift context sctx */ 419 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 420 421 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 422 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 423 sctx.shift_top = info->zeropivot; 424 for (i = 0; i < n; i++) { 425 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 426 d = aa[ddiag[i]]; 427 rs = -PetscAbsScalar(d) - PetscRealPart(d); 428 v = aa + ai[i]; 429 nz = ai[i + 1] - ai[i]; 430 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 431 if (rs > sctx.shift_top) sctx.shift_top = rs; 432 } 433 sctx.shift_top *= 1.1; 434 sctx.nshift_max = 5; 435 sctx.shift_lo = 0.; 436 sctx.shift_hi = 1.; 437 } 438 439 PetscCall(ISGetIndices(isrow, &r)); 440 PetscCall(ISGetIndices(isicol, &ic)); 441 PetscCall(PetscMalloc1(n + 1, &rtmp)); 442 ics = ic; 443 444 do { 445 sctx.newshift = PETSC_FALSE; 446 for (i = 0; i < n; i++) { 447 nz = bi[i + 1] - bi[i]; 448 bjtmp = bj + bi[i]; 449 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 450 451 /* load in initial (unfactored row) */ 452 nz = ai[r[i] + 1] - ai[r[i]]; 453 ajtmp = aj + ai[r[i]]; 454 v = aa + ai[r[i]]; 455 for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 456 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 457 458 row = *bjtmp++; 459 while (row < i) { 460 pc = rtmp + row; 461 if (*pc != 0.0) { 462 pv = ba + ddiag[row]; 463 pj = b->j + ddiag[row] + 1; 464 multiplier = *pc / *pv++; 465 *pc = multiplier; 466 nz = bi[row + 1] - ddiag[row] - 1; 467 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 468 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 469 } 470 row = *bjtmp++; 471 } 472 /* finished row so stick it into b->a */ 473 pv = ba + bi[i]; 474 pj = b->j + bi[i]; 475 nz = bi[i + 1] - bi[i]; 476 diag = ddiag[i] - bi[i]; 477 rs = 0.0; 478 for (j = 0; j < nz; j++) { 479 pv[j] = rtmp[pj[j]]; 480 rs += PetscAbsScalar(pv[j]); 481 } 482 rs -= PetscAbsScalar(pv[diag]); 483 484 sctx.rs = rs; 485 sctx.pv = pv[diag]; 486 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 487 if (sctx.newshift) break; 488 pv[diag] = sctx.pv; 489 } 490 491 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 492 /* 493 * if no shift in this attempt & shifting & started shifting & can refine, 494 * then try lower shift 495 */ 496 sctx.shift_hi = sctx.shift_fraction; 497 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 498 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 499 sctx.newshift = PETSC_TRUE; 500 sctx.nshift++; 501 } 502 } while (sctx.newshift); 503 504 /* invert diagonal entries for simpler triangular solves */ 505 for (i = 0; i < n; i++) ba[ddiag[i]] = 1.0 / ba[ddiag[i]]; 506 PetscCall(PetscFree(rtmp)); 507 PetscCall(ISRestoreIndices(isicol, &ic)); 508 PetscCall(ISRestoreIndices(isrow, &r)); 509 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 510 PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba)); 511 512 PetscCall(ISIdentity(isrow, &row_identity)); 513 PetscCall(ISIdentity(isicol, &col_identity)); 514 if (row_identity && col_identity) { 515 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 516 } else { 517 C->ops->solve = MatSolve_SeqAIJ_inplace; 518 } 519 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 520 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 521 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 522 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 523 C->ops->matsolvetranspose = NULL; 524 525 C->assembled = PETSC_TRUE; 526 C->preallocated = PETSC_TRUE; 527 528 PetscCall(PetscLogFlops(C->cmap->n)); 529 if (sctx.nshift) { 530 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 531 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 532 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 533 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 534 } 535 } 536 C->ops->solve = MatSolve_SeqAIJ_inplace; 537 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 538 539 PetscCall(MatSeqAIJCheckInode(C)); 540 PetscFunctionReturn(PETSC_SUCCESS); 541 } 542 543 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec); 544 545 /* 546 This routine implements inplace ILU(0) with row or/and column permutations. 547 Input: 548 A - original matrix 549 Output; 550 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 551 a->j (col index) is permuted by the inverse of colperm, then sorted 552 a->a reordered accordingly with a->j 553 a->diag (ptr to diagonal elements) is updated. 554 */ 555 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info) 556 { 557 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 558 IS isrow = a->row, isicol = a->icol; 559 const PetscInt *r, *ic, *ics; 560 PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j; 561 PetscInt *ajtmp, nz, row; 562 PetscInt nbdiag, *pj; 563 PetscScalar *rtmp, *pc, multiplier, d; 564 MatScalar *pv, *v; 565 PetscReal rs; 566 FactorShiftCtx sctx; 567 MatScalar *aa, *vtmp; 568 PetscInt *diag; 569 570 PetscFunctionBegin; 571 PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address"); 572 573 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, (const PetscInt **)&diag, NULL)); 574 PetscCall(MatSeqAIJGetArray(A, &aa)); 575 /* MatPivotSetUp(): initialize shift context sctx */ 576 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 577 578 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 579 const PetscInt *ddiag; 580 581 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL)); 582 sctx.shift_top = info->zeropivot; 583 for (i = 0; i < n; i++) { 584 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 585 d = aa[ddiag[i]]; 586 rs = -PetscAbsScalar(d) - PetscRealPart(d); 587 vtmp = aa + ai[i]; 588 nz = ai[i + 1] - ai[i]; 589 for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]); 590 if (rs > sctx.shift_top) sctx.shift_top = rs; 591 } 592 sctx.shift_top *= 1.1; 593 sctx.nshift_max = 5; 594 sctx.shift_lo = 0.; 595 sctx.shift_hi = 1.; 596 } 597 598 PetscCall(ISGetIndices(isrow, &r)); 599 PetscCall(ISGetIndices(isicol, &ic)); 600 PetscCall(PetscMalloc1(n + 1, &rtmp)); 601 PetscCall(PetscArrayzero(rtmp, n + 1)); 602 ics = ic; 603 604 #if defined(MV) 605 sctx.shift_top = 0.; 606 sctx.nshift_max = 0; 607 sctx.shift_lo = 0.; 608 sctx.shift_hi = 0.; 609 sctx.shift_fraction = 0.; 610 611 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 612 sctx.shift_top = 0.; 613 for (i = 0; i < n; i++) { 614 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 615 d = aa[diag[i]]; 616 rs = -PetscAbsScalar(d) - PetscRealPart(d); 617 v = aa + ai[i]; 618 nz = ai[i + 1] - ai[i]; 619 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 620 if (rs > sctx.shift_top) sctx.shift_top = rs; 621 } 622 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 623 sctx.shift_top *= 1.1; 624 sctx.nshift_max = 5; 625 sctx.shift_lo = 0.; 626 sctx.shift_hi = 1.; 627 } 628 629 sctx.shift_amount = 0.; 630 sctx.nshift = 0; 631 #endif 632 633 do { 634 sctx.newshift = PETSC_FALSE; 635 for (i = 0; i < n; i++) { 636 /* load in initial unfactored row */ 637 nz = ai[r[i] + 1] - ai[r[i]]; 638 ajtmp = aj + ai[r[i]]; 639 v = aa + ai[r[i]]; 640 /* sort permuted ajtmp and values v accordingly */ 641 for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]]; 642 PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v)); 643 644 diag[r[i]] = ai[r[i]]; 645 for (j = 0; j < nz; j++) { 646 rtmp[ajtmp[j]] = v[j]; 647 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 648 } 649 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 650 651 row = *ajtmp++; 652 while (row < i) { 653 pc = rtmp + row; 654 if (*pc != 0.0) { 655 pv = aa + diag[r[row]]; 656 pj = aj + diag[r[row]] + 1; 657 658 multiplier = *pc / *pv++; 659 *pc = multiplier; 660 nz = ai[r[row] + 1] - diag[r[row]] - 1; 661 for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 662 PetscCall(PetscLogFlops(1 + 2.0 * nz)); 663 } 664 row = *ajtmp++; 665 } 666 /* finished row so overwrite it onto aa */ 667 pv = aa + ai[r[i]]; 668 pj = aj + ai[r[i]]; 669 nz = ai[r[i] + 1] - ai[r[i]]; 670 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 671 672 rs = 0.0; 673 for (j = 0; j < nz; j++) { 674 pv[j] = rtmp[pj[j]]; 675 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 676 } 677 678 sctx.rs = rs; 679 sctx.pv = pv[nbdiag]; 680 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 681 if (sctx.newshift) break; 682 pv[nbdiag] = sctx.pv; 683 } 684 685 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) { 686 /* 687 * if no shift in this attempt & shifting & started shifting & can refine, 688 * then try lower shift 689 */ 690 sctx.shift_hi = sctx.shift_fraction; 691 sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.; 692 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 693 sctx.newshift = PETSC_TRUE; 694 sctx.nshift++; 695 } 696 } while (sctx.newshift); 697 698 /* invert diagonal entries for simpler triangular solves */ 699 for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]]; 700 701 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 702 PetscCall(PetscFree(rtmp)); 703 PetscCall(ISRestoreIndices(isicol, &ic)); 704 PetscCall(ISRestoreIndices(isrow, &r)); 705 706 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 707 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 708 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 709 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 710 711 A->assembled = PETSC_TRUE; 712 A->preallocated = PETSC_TRUE; 713 714 PetscCall(PetscLogFlops(A->cmap->n)); 715 if (sctx.nshift) { 716 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 717 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 718 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 719 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 720 } 721 } 722 PetscFunctionReturn(PETSC_SUCCESS); 723 } 724 725 PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info) 726 { 727 Mat C; 728 729 PetscFunctionBegin; 730 PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C)); 731 PetscCall(MatLUFactorSymbolic(C, A, row, col, info)); 732 PetscCall(MatLUFactorNumeric(C, A, info)); 733 734 A->ops->solve = C->ops->solve; 735 A->ops->solvetranspose = C->ops->solvetranspose; 736 737 PetscCall(MatHeaderMerge(A, &C)); 738 PetscFunctionReturn(PETSC_SUCCESS); 739 } 740 741 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 742 { 743 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 744 IS iscol = a->col, isrow = a->row; 745 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 746 PetscInt nz; 747 const PetscInt *rout, *cout, *r, *c; 748 PetscScalar *x, *tmp, *tmps, sum; 749 const PetscScalar *b; 750 const MatScalar *aa, *v; 751 const PetscInt *adiag; 752 753 PetscFunctionBegin; 754 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 755 756 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 757 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 758 PetscCall(VecGetArrayRead(bb, &b)); 759 PetscCall(VecGetArrayWrite(xx, &x)); 760 tmp = a->solve_work; 761 762 PetscCall(ISGetIndices(isrow, &rout)); 763 r = rout; 764 PetscCall(ISGetIndices(iscol, &cout)); 765 c = cout + (n - 1); 766 767 /* forward solve the lower triangular */ 768 tmp[0] = b[*r++]; 769 tmps = tmp; 770 for (i = 1; i < n; i++) { 771 v = aa + ai[i]; 772 vi = aj + ai[i]; 773 nz = adiag[i] - ai[i]; 774 sum = b[*r++]; 775 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 776 tmp[i] = sum; 777 } 778 779 /* backward solve the upper triangular */ 780 for (i = n - 1; i >= 0; i--) { 781 v = aa + adiag[i] + 1; 782 vi = aj + adiag[i] + 1; 783 nz = ai[i + 1] - adiag[i] - 1; 784 sum = tmp[i]; 785 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 786 x[*c--] = tmp[i] = sum * aa[adiag[i]]; 787 } 788 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 789 PetscCall(ISRestoreIndices(isrow, &rout)); 790 PetscCall(ISRestoreIndices(iscol, &cout)); 791 PetscCall(VecRestoreArrayRead(bb, &b)); 792 PetscCall(VecRestoreArrayWrite(xx, &x)); 793 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 794 PetscFunctionReturn(PETSC_SUCCESS); 795 } 796 797 static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X) 798 { 799 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 800 IS iscol = a->col, isrow = a->row; 801 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 802 PetscInt nz, neq, ldb, ldx; 803 const PetscInt *rout, *cout, *r, *c; 804 PetscScalar *x, *tmp = a->solve_work, *tmps, sum; 805 const PetscScalar *b, *aa, *v; 806 PetscBool isdense; 807 const PetscInt *adiag; 808 809 PetscFunctionBegin; 810 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 811 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 812 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 813 if (X != B) { 814 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 815 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 816 } 817 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 818 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 819 PetscCall(MatDenseGetArrayRead(B, &b)); 820 PetscCall(MatDenseGetLDA(B, &ldb)); 821 PetscCall(MatDenseGetArray(X, &x)); 822 PetscCall(MatDenseGetLDA(X, &ldx)); 823 PetscCall(ISGetIndices(isrow, &rout)); 824 r = rout; 825 PetscCall(ISGetIndices(iscol, &cout)); 826 c = cout; 827 for (neq = 0; neq < B->cmap->n; neq++) { 828 /* forward solve the lower triangular */ 829 tmp[0] = b[r[0]]; 830 tmps = tmp; 831 for (i = 1; i < n; i++) { 832 v = aa + ai[i]; 833 vi = aj + ai[i]; 834 nz = adiag[i] - ai[i]; 835 sum = b[r[i]]; 836 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 837 tmp[i] = sum; 838 } 839 /* backward solve the upper triangular */ 840 for (i = n - 1; i >= 0; i--) { 841 v = aa + adiag[i] + 1; 842 vi = aj + adiag[i] + 1; 843 nz = ai[i + 1] - adiag[i] - 1; 844 sum = tmp[i]; 845 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 846 x[c[i]] = tmp[i] = sum * aa[adiag[i]]; 847 } 848 b += ldb; 849 x += ldx; 850 } 851 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 852 PetscCall(ISRestoreIndices(isrow, &rout)); 853 PetscCall(ISRestoreIndices(iscol, &cout)); 854 PetscCall(MatDenseRestoreArrayRead(B, &b)); 855 PetscCall(MatDenseRestoreArray(X, &x)); 856 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 860 PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X) 861 { 862 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 863 IS iscol = a->col, isrow = a->row; 864 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 865 const PetscInt *adiag; 866 PetscInt nz, neq, ldb, ldx; 867 const PetscInt *rout, *cout, *r, *c; 868 PetscScalar *x, *tmp = a->solve_work, sum; 869 const PetscScalar *b, *aa, *v; 870 PetscBool isdense; 871 872 PetscFunctionBegin; 873 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 874 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 875 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 876 if (X != B) { 877 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 878 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 879 } 880 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 881 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 882 PetscCall(MatDenseGetArrayRead(B, &b)); 883 PetscCall(MatDenseGetLDA(B, &ldb)); 884 PetscCall(MatDenseGetArray(X, &x)); 885 PetscCall(MatDenseGetLDA(X, &ldx)); 886 PetscCall(ISGetIndices(isrow, &rout)); 887 r = rout; 888 PetscCall(ISGetIndices(iscol, &cout)); 889 c = cout; 890 for (neq = 0; neq < B->cmap->n; neq++) { 891 /* forward solve the lower triangular */ 892 tmp[0] = b[r[0]]; 893 v = aa; 894 vi = aj; 895 for (i = 1; i < n; i++) { 896 nz = ai[i + 1] - ai[i]; 897 sum = b[r[i]]; 898 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 899 tmp[i] = sum; 900 v += nz; 901 vi += nz; 902 } 903 /* backward solve the upper triangular */ 904 for (i = n - 1; i >= 0; i--) { 905 v = aa + adiag[i + 1] + 1; 906 vi = aj + adiag[i + 1] + 1; 907 nz = adiag[i] - adiag[i + 1] - 1; 908 sum = tmp[i]; 909 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 910 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 911 } 912 b += ldb; 913 x += ldx; 914 } 915 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 916 PetscCall(ISRestoreIndices(isrow, &rout)); 917 PetscCall(ISRestoreIndices(iscol, &cout)); 918 PetscCall(MatDenseRestoreArrayRead(B, &b)); 919 PetscCall(MatDenseRestoreArray(X, &x)); 920 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 924 PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X) 925 { 926 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 927 IS iscol = a->col, isrow = a->row; 928 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, j; 929 const PetscInt *adiag = a->diag; 930 PetscInt nz, neq, ldb, ldx; 931 const PetscInt *rout, *cout, *r, *c; 932 PetscScalar *x, *tmp = a->solve_work, s1; 933 const PetscScalar *b, *aa, *v; 934 PetscBool isdense; 935 936 PetscFunctionBegin; 937 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 938 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense)); 939 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix"); 940 if (X != B) { 941 PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense)); 942 PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix"); 943 } 944 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 945 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 946 PetscCall(MatDenseGetArrayRead(B, &b)); 947 PetscCall(MatDenseGetLDA(B, &ldb)); 948 PetscCall(MatDenseGetArray(X, &x)); 949 PetscCall(MatDenseGetLDA(X, &ldx)); 950 PetscCall(ISGetIndices(isrow, &rout)); 951 r = rout; 952 PetscCall(ISGetIndices(iscol, &cout)); 953 c = cout; 954 for (neq = 0; neq < B->cmap->n; neq++) { 955 /* copy the b into temp work space according to permutation */ 956 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 957 958 /* forward solve the U^T */ 959 for (i = 0; i < n; i++) { 960 v = aa + adiag[i + 1] + 1; 961 vi = aj + adiag[i + 1] + 1; 962 nz = adiag[i] - adiag[i + 1] - 1; 963 s1 = tmp[i]; 964 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 965 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 966 tmp[i] = s1; 967 } 968 969 /* backward solve the L^T */ 970 for (i = n - 1; i >= 0; i--) { 971 v = aa + ai[i]; 972 vi = aj + ai[i]; 973 nz = ai[i + 1] - ai[i]; 974 s1 = tmp[i]; 975 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 976 } 977 978 /* copy tmp into x according to permutation */ 979 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 980 b += ldb; 981 x += ldx; 982 } 983 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 984 PetscCall(ISRestoreIndices(isrow, &rout)); 985 PetscCall(ISRestoreIndices(iscol, &cout)); 986 PetscCall(MatDenseRestoreArrayRead(B, &b)); 987 PetscCall(MatDenseRestoreArray(X, &x)); 988 PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n))); 989 PetscFunctionReturn(PETSC_SUCCESS); 990 } 991 992 static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx) 993 { 994 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 995 IS iscol = a->col, isrow = a->row; 996 const PetscInt *r, *c, *rout, *cout, *adiag; 997 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j; 998 PetscInt nz, row; 999 PetscScalar *x, *tmp, *tmps, sum; 1000 const PetscScalar *b; 1001 const MatScalar *aa, *v; 1002 1003 PetscFunctionBegin; 1004 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1005 1006 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1007 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1008 PetscCall(VecGetArrayRead(bb, &b)); 1009 PetscCall(VecGetArrayWrite(xx, &x)); 1010 tmp = a->solve_work; 1011 1012 PetscCall(ISGetIndices(isrow, &rout)); 1013 r = rout; 1014 PetscCall(ISGetIndices(iscol, &cout)); 1015 c = cout + (n - 1); 1016 1017 /* forward solve the lower triangular */ 1018 tmp[0] = b[*r++]; 1019 tmps = tmp; 1020 for (row = 1; row < n; row++) { 1021 i = rout[row]; /* permuted row */ 1022 v = aa + ai[i]; 1023 vi = aj + ai[i]; 1024 nz = adiag[i] - ai[i]; 1025 sum = b[*r++]; 1026 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1027 tmp[row] = sum; 1028 } 1029 1030 /* backward solve the upper triangular */ 1031 for (row = n - 1; row >= 0; row--) { 1032 i = rout[row]; /* permuted row */ 1033 v = aa + adiag[i] + 1; 1034 vi = aj + adiag[i] + 1; 1035 nz = ai[i + 1] - adiag[i] - 1; 1036 sum = tmp[row]; 1037 PetscSparseDenseMinusDot(sum, tmps, v, vi, nz); 1038 x[*c--] = tmp[row] = sum * aa[adiag[i]]; 1039 } 1040 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1041 PetscCall(ISRestoreIndices(isrow, &rout)); 1042 PetscCall(ISRestoreIndices(iscol, &cout)); 1043 PetscCall(VecRestoreArrayRead(bb, &b)); 1044 PetscCall(VecRestoreArrayWrite(xx, &x)); 1045 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1046 PetscFunctionReturn(PETSC_SUCCESS); 1047 } 1048 1049 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1050 static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx) 1051 { 1052 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1053 PetscInt n = A->rmap->n; 1054 const PetscInt *ai = a->i, *aj = a->j, *adiag; 1055 PetscScalar *x; 1056 const PetscScalar *b; 1057 const MatScalar *aa; 1058 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1059 PetscInt adiag_i, i, nz, ai_i; 1060 const PetscInt *vi; 1061 const MatScalar *v; 1062 PetscScalar sum; 1063 #endif 1064 1065 PetscFunctionBegin; 1066 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 1067 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1068 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1069 PetscCall(VecGetArrayRead(bb, &b)); 1070 PetscCall(VecGetArrayWrite(xx, &x)); 1071 1072 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1073 fortransolveaij_(&n, x, ai, aj, adiag, aa, b); 1074 #else 1075 /* forward solve the lower triangular */ 1076 x[0] = b[0]; 1077 for (i = 1; i < n; i++) { 1078 ai_i = ai[i]; 1079 v = aa + ai_i; 1080 vi = aj + ai_i; 1081 nz = adiag[i] - ai_i; 1082 sum = b[i]; 1083 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1084 x[i] = sum; 1085 } 1086 1087 /* backward solve the upper triangular */ 1088 for (i = n - 1; i >= 0; i--) { 1089 adiag_i = adiag[i]; 1090 v = aa + adiag_i + 1; 1091 vi = aj + adiag_i + 1; 1092 nz = ai[i + 1] - adiag_i - 1; 1093 sum = x[i]; 1094 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 1095 x[i] = sum * aa[adiag_i]; 1096 } 1097 #endif 1098 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1099 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1100 PetscCall(VecRestoreArrayRead(bb, &b)); 1101 PetscCall(VecRestoreArrayWrite(xx, &x)); 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx) 1106 { 1107 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1108 IS iscol = a->col, isrow = a->row; 1109 PetscInt i, n = A->rmap->n, j; 1110 PetscInt nz; 1111 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag; 1112 PetscScalar *x, *tmp, sum; 1113 const PetscScalar *b; 1114 const MatScalar *aa, *v; 1115 1116 PetscFunctionBegin; 1117 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1118 1119 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1120 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1121 PetscCall(VecGetArrayRead(bb, &b)); 1122 PetscCall(VecGetArray(xx, &x)); 1123 tmp = a->solve_work; 1124 1125 PetscCall(ISGetIndices(isrow, &rout)); 1126 r = rout; 1127 PetscCall(ISGetIndices(iscol, &cout)); 1128 c = cout + (n - 1); 1129 1130 /* forward solve the lower triangular */ 1131 tmp[0] = b[*r++]; 1132 for (i = 1; i < n; i++) { 1133 v = aa + ai[i]; 1134 vi = aj + ai[i]; 1135 nz = adiag[i] - ai[i]; 1136 sum = b[*r++]; 1137 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1138 tmp[i] = sum; 1139 } 1140 1141 /* backward solve the upper triangular */ 1142 for (i = n - 1; i >= 0; i--) { 1143 v = aa + adiag[i] + 1; 1144 vi = aj + adiag[i] + 1; 1145 nz = ai[i + 1] - adiag[i] - 1; 1146 sum = tmp[i]; 1147 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1148 tmp[i] = sum * aa[adiag[i]]; 1149 x[*c--] += tmp[i]; 1150 } 1151 1152 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1153 PetscCall(ISRestoreIndices(isrow, &rout)); 1154 PetscCall(ISRestoreIndices(iscol, &cout)); 1155 PetscCall(VecRestoreArrayRead(bb, &b)); 1156 PetscCall(VecRestoreArray(xx, &x)); 1157 PetscCall(PetscLogFlops(2.0 * a->nz)); 1158 PetscFunctionReturn(PETSC_SUCCESS); 1159 } 1160 1161 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx) 1162 { 1163 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1164 IS iscol = a->col, isrow = a->row; 1165 PetscInt i, n = A->rmap->n, j; 1166 PetscInt nz; 1167 const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag; 1168 PetscScalar *x, *tmp, sum; 1169 const PetscScalar *b; 1170 const MatScalar *aa, *v; 1171 1172 PetscFunctionBegin; 1173 if (yy != xx) PetscCall(VecCopy(yy, xx)); 1174 1175 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1176 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1177 PetscCall(VecGetArrayRead(bb, &b)); 1178 PetscCall(VecGetArray(xx, &x)); 1179 tmp = a->solve_work; 1180 1181 PetscCall(ISGetIndices(isrow, &rout)); 1182 r = rout; 1183 PetscCall(ISGetIndices(iscol, &cout)); 1184 c = cout; 1185 1186 /* forward solve the lower triangular */ 1187 tmp[0] = b[r[0]]; 1188 v = aa; 1189 vi = aj; 1190 for (i = 1; i < n; i++) { 1191 nz = ai[i + 1] - ai[i]; 1192 sum = b[r[i]]; 1193 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1194 tmp[i] = sum; 1195 v += nz; 1196 vi += nz; 1197 } 1198 1199 /* backward solve the upper triangular */ 1200 v = aa + adiag[n - 1]; 1201 vi = aj + adiag[n - 1]; 1202 for (i = n - 1; i >= 0; i--) { 1203 nz = adiag[i] - adiag[i + 1] - 1; 1204 sum = tmp[i]; 1205 for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]]; 1206 tmp[i] = sum * v[nz]; 1207 x[c[i]] += tmp[i]; 1208 v += nz + 1; 1209 vi += nz + 1; 1210 } 1211 1212 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1213 PetscCall(ISRestoreIndices(isrow, &rout)); 1214 PetscCall(ISRestoreIndices(iscol, &cout)); 1215 PetscCall(VecRestoreArrayRead(bb, &b)); 1216 PetscCall(VecRestoreArray(xx, &x)); 1217 PetscCall(PetscLogFlops(2.0 * a->nz)); 1218 PetscFunctionReturn(PETSC_SUCCESS); 1219 } 1220 1221 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx) 1222 { 1223 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1224 IS iscol = a->col, isrow = a->row; 1225 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1226 PetscInt i, n = A->rmap->n, j; 1227 PetscInt nz; 1228 PetscScalar *x, *tmp, s1; 1229 const MatScalar *aa, *v; 1230 const PetscScalar *b; 1231 1232 PetscFunctionBegin; 1233 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1234 PetscCall(VecGetArrayRead(bb, &b)); 1235 PetscCall(VecGetArrayWrite(xx, &x)); 1236 tmp = a->solve_work; 1237 1238 PetscCall(ISGetIndices(isrow, &rout)); 1239 r = rout; 1240 PetscCall(ISGetIndices(iscol, &cout)); 1241 c = cout; 1242 1243 /* copy the b into temp work space according to permutation */ 1244 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1245 1246 /* forward solve the U^T */ 1247 for (i = 0; i < n; i++) { 1248 v = aa + diag[i]; 1249 vi = aj + diag[i] + 1; 1250 nz = ai[i + 1] - diag[i] - 1; 1251 s1 = tmp[i]; 1252 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1253 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1254 tmp[i] = s1; 1255 } 1256 1257 /* backward solve the L^T */ 1258 for (i = n - 1; i >= 0; i--) { 1259 v = aa + diag[i] - 1; 1260 vi = aj + diag[i] - 1; 1261 nz = diag[i] - ai[i]; 1262 s1 = tmp[i]; 1263 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1264 } 1265 1266 /* copy tmp into x according to permutation */ 1267 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1268 1269 PetscCall(ISRestoreIndices(isrow, &rout)); 1270 PetscCall(ISRestoreIndices(iscol, &cout)); 1271 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1272 PetscCall(VecRestoreArrayRead(bb, &b)); 1273 PetscCall(VecRestoreArrayWrite(xx, &x)); 1274 1275 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1276 PetscFunctionReturn(PETSC_SUCCESS); 1277 } 1278 1279 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx) 1280 { 1281 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1282 IS iscol = a->col, isrow = a->row; 1283 const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi; 1284 PetscInt i, n = A->rmap->n, j; 1285 PetscInt nz; 1286 PetscScalar *x, *tmp, s1; 1287 const MatScalar *aa, *v; 1288 const PetscScalar *b; 1289 1290 PetscFunctionBegin; 1291 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1292 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1293 PetscCall(VecGetArrayRead(bb, &b)); 1294 PetscCall(VecGetArrayWrite(xx, &x)); 1295 tmp = a->solve_work; 1296 1297 PetscCall(ISGetIndices(isrow, &rout)); 1298 r = rout; 1299 PetscCall(ISGetIndices(iscol, &cout)); 1300 c = cout; 1301 1302 /* copy the b into temp work space according to permutation */ 1303 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1304 1305 /* forward solve the U^T */ 1306 for (i = 0; i < n; i++) { 1307 v = aa + adiag[i + 1] + 1; 1308 vi = aj + adiag[i + 1] + 1; 1309 nz = adiag[i] - adiag[i + 1] - 1; 1310 s1 = tmp[i]; 1311 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1312 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1313 tmp[i] = s1; 1314 } 1315 1316 /* backward solve the L^T */ 1317 for (i = n - 1; i >= 0; i--) { 1318 v = aa + ai[i]; 1319 vi = aj + ai[i]; 1320 nz = ai[i + 1] - ai[i]; 1321 s1 = tmp[i]; 1322 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1323 } 1324 1325 /* copy tmp into x according to permutation */ 1326 for (i = 0; i < n; i++) x[r[i]] = tmp[i]; 1327 1328 PetscCall(ISRestoreIndices(isrow, &rout)); 1329 PetscCall(ISRestoreIndices(iscol, &cout)); 1330 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1331 PetscCall(VecRestoreArrayRead(bb, &b)); 1332 PetscCall(VecRestoreArrayWrite(xx, &x)); 1333 1334 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1335 PetscFunctionReturn(PETSC_SUCCESS); 1336 } 1337 1338 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx) 1339 { 1340 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1341 IS iscol = a->col, isrow = a->row; 1342 const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi; 1343 PetscInt i, n = A->rmap->n, j; 1344 PetscInt nz; 1345 PetscScalar *x, *tmp, s1; 1346 const MatScalar *aa, *v; 1347 const PetscScalar *b; 1348 1349 PetscFunctionBegin; 1350 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1351 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1352 PetscCall(VecGetArrayRead(bb, &b)); 1353 PetscCall(VecGetArray(xx, &x)); 1354 tmp = a->solve_work; 1355 1356 PetscCall(ISGetIndices(isrow, &rout)); 1357 r = rout; 1358 PetscCall(ISGetIndices(iscol, &cout)); 1359 c = cout; 1360 1361 /* copy the b into temp work space according to permutation */ 1362 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1363 1364 /* forward solve the U^T */ 1365 for (i = 0; i < n; i++) { 1366 v = aa + diag[i]; 1367 vi = aj + diag[i] + 1; 1368 nz = ai[i + 1] - diag[i] - 1; 1369 s1 = tmp[i]; 1370 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1371 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1372 tmp[i] = s1; 1373 } 1374 1375 /* backward solve the L^T */ 1376 for (i = n - 1; i >= 0; i--) { 1377 v = aa + diag[i] - 1; 1378 vi = aj + diag[i] - 1; 1379 nz = diag[i] - ai[i]; 1380 s1 = tmp[i]; 1381 for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j]; 1382 } 1383 1384 /* copy tmp into x according to permutation */ 1385 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1386 1387 PetscCall(ISRestoreIndices(isrow, &rout)); 1388 PetscCall(ISRestoreIndices(iscol, &cout)); 1389 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1390 PetscCall(VecRestoreArrayRead(bb, &b)); 1391 PetscCall(VecRestoreArray(xx, &x)); 1392 1393 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1394 PetscFunctionReturn(PETSC_SUCCESS); 1395 } 1396 1397 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx) 1398 { 1399 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1400 IS iscol = a->col, isrow = a->row; 1401 const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi; 1402 PetscInt i, n = A->rmap->n, j; 1403 PetscInt nz; 1404 PetscScalar *x, *tmp, s1; 1405 const MatScalar *aa, *v; 1406 const PetscScalar *b; 1407 1408 PetscFunctionBegin; 1409 if (zz != xx) PetscCall(VecCopy(zz, xx)); 1410 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1411 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1412 PetscCall(VecGetArrayRead(bb, &b)); 1413 PetscCall(VecGetArray(xx, &x)); 1414 tmp = a->solve_work; 1415 1416 PetscCall(ISGetIndices(isrow, &rout)); 1417 r = rout; 1418 PetscCall(ISGetIndices(iscol, &cout)); 1419 c = cout; 1420 1421 /* copy the b into temp work space according to permutation */ 1422 for (i = 0; i < n; i++) tmp[i] = b[c[i]]; 1423 1424 /* forward solve the U^T */ 1425 for (i = 0; i < n; i++) { 1426 v = aa + adiag[i + 1] + 1; 1427 vi = aj + adiag[i + 1] + 1; 1428 nz = adiag[i] - adiag[i + 1] - 1; 1429 s1 = tmp[i]; 1430 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1431 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1432 tmp[i] = s1; 1433 } 1434 1435 /* backward solve the L^T */ 1436 for (i = n - 1; i >= 0; i--) { 1437 v = aa + ai[i]; 1438 vi = aj + ai[i]; 1439 nz = ai[i + 1] - ai[i]; 1440 s1 = tmp[i]; 1441 for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j]; 1442 } 1443 1444 /* copy tmp into x according to permutation */ 1445 for (i = 0; i < n; i++) x[r[i]] += tmp[i]; 1446 1447 PetscCall(ISRestoreIndices(isrow, &rout)); 1448 PetscCall(ISRestoreIndices(iscol, &cout)); 1449 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1450 PetscCall(VecRestoreArrayRead(bb, &b)); 1451 PetscCall(VecRestoreArray(xx, &x)); 1452 1453 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 1454 PetscFunctionReturn(PETSC_SUCCESS); 1455 } 1456 1457 /* 1458 ilu() under revised new data structure. 1459 Factored arrays bj and ba are stored as 1460 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1461 1462 bi=fact->i is an array of size n+1, in which 1463 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1464 bi[n]: points to L(n-1,n-1)+1 1465 1466 bdiag=fact->diag is an array of size n+1,in which 1467 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1468 bdiag[n]: points to entry of U(n-1,0)-1 1469 1470 U(i,:) contains bdiag[i] as its last entry, i.e., 1471 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1472 */ 1473 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1474 { 1475 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1476 const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag; 1477 PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag; 1478 IS isicol; 1479 1480 PetscFunctionBegin; 1481 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1482 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1483 PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE)); 1484 b = (Mat_SeqAIJ *)fact->data; 1485 1486 /* allocate matrix arrays for new data structure */ 1487 PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a)); 1488 PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j)); 1489 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i)); 1490 if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n])); 1491 b->free_a = PETSC_TRUE; 1492 b->free_ij = PETSC_TRUE; 1493 1494 if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag)); 1495 bdiag = b->diag; 1496 1497 /* set bi and bj with new data structure */ 1498 bi = b->i; 1499 bj = b->j; 1500 1501 /* L part */ 1502 bi[0] = 0; 1503 for (i = 0; i < n; i++) { 1504 nz = adiag[i] - ai[i]; 1505 bi[i + 1] = bi[i] + nz; 1506 aj = a->j + ai[i]; 1507 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1508 } 1509 1510 /* U part */ 1511 bdiag[n] = bi[n] - 1; 1512 for (i = n - 1; i >= 0; i--) { 1513 nz = ai[i + 1] - adiag[i] - 1; 1514 aj = a->j + adiag[i] + 1; 1515 for (j = 0; j < nz; j++) bj[k++] = aj[j]; 1516 /* diag[i] */ 1517 bj[k++] = i; 1518 bdiag[i] = bdiag[i + 1] + nz + 1; 1519 } 1520 1521 fact->factortype = MAT_FACTOR_ILU; 1522 fact->info.factor_mallocs = 0; 1523 fact->info.fill_ratio_given = info->fill; 1524 fact->info.fill_ratio_needed = 1.0; 1525 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1526 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1527 1528 b = (Mat_SeqAIJ *)fact->data; 1529 b->row = isrow; 1530 b->col = iscol; 1531 b->icol = isicol; 1532 PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work)); 1533 PetscCall(PetscObjectReference((PetscObject)isrow)); 1534 PetscCall(PetscObjectReference((PetscObject)iscol)); 1535 PetscFunctionReturn(PETSC_SUCCESS); 1536 } 1537 1538 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 1539 { 1540 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b; 1541 IS isicol; 1542 const PetscInt *r, *ic; 1543 PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j; 1544 PetscInt *bi, *cols, nnz, *cols_lvl; 1545 PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0; 1546 PetscInt i, levels, diagonal_fill; 1547 PetscBool col_identity, row_identity; 1548 PetscReal f; 1549 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 1550 PetscBT lnkbt; 1551 PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr; 1552 PetscFreeSpaceList free_space = NULL, current_space = NULL; 1553 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 1554 PetscBool diagDense; 1555 1556 PetscFunctionBegin; 1557 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 1558 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense)); 1559 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 1560 1561 levels = (PetscInt)info->levels; 1562 PetscCall(ISIdentity(isrow, &row_identity)); 1563 PetscCall(ISIdentity(iscol, &col_identity)); 1564 if (!levels && row_identity && col_identity) { 1565 /* special case: ilu(0) with natural ordering */ 1566 PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info)); 1567 if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1568 PetscFunctionReturn(PETSC_SUCCESS); 1569 } 1570 1571 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1572 PetscCall(ISGetIndices(isrow, &r)); 1573 PetscCall(ISGetIndices(isicol, &ic)); 1574 1575 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1576 PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi)); 1577 PetscCall(PetscMalloc1(n + 1, &bdiag)); 1578 bi[0] = bdiag[0] = 0; 1579 PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr)); 1580 1581 /* create a linked list for storing column indices of the active row */ 1582 nlnk = n + 1; 1583 PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt)); 1584 1585 /* initial FreeSpace size is f*(ai[n]+1) */ 1586 f = info->fill; 1587 diagonal_fill = (PetscInt)info->diagonal_fill; 1588 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 1589 current_space = free_space; 1590 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl)); 1591 current_space_lvl = free_space_lvl; 1592 for (i = 0; i < n; i++) { 1593 nzi = 0; 1594 /* copy current row into linked list */ 1595 nnz = ai[r[i] + 1] - ai[r[i]]; 1596 PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i); 1597 cols = aj + ai[r[i]]; 1598 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1599 PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt)); 1600 nzi += nlnk; 1601 1602 /* make sure diagonal entry is included */ 1603 if (diagonal_fill && lnk[i] == -1) { 1604 fm = n; 1605 while (lnk[fm] < i) fm = lnk[fm]; 1606 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1607 lnk[fm] = i; 1608 lnk_lvl[i] = 0; 1609 nzi++; 1610 dcount++; 1611 } 1612 1613 /* add pivot rows into the active row */ 1614 nzbd = 0; 1615 prow = lnk[n]; 1616 while (prow < i) { 1617 nnz = bdiag[prow]; 1618 cols = bj_ptr[prow] + nnz + 1; 1619 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1620 nnz = bi[prow + 1] - bi[prow] - nnz - 1; 1621 PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow)); 1622 nzi += nlnk; 1623 prow = lnk[prow]; 1624 nzbd++; 1625 } 1626 bdiag[i] = nzbd; 1627 bi[i + 1] = bi[i] + nzi; 1628 /* if free space is not available, make more free space */ 1629 if (current_space->local_remaining < nzi) { 1630 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */ 1631 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 1632 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl)); 1633 reallocs++; 1634 } 1635 1636 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1637 PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 1638 bj_ptr[i] = current_space->array; 1639 bjlvl_ptr[i] = current_space_lvl->array; 1640 1641 /* make sure the active row i has diagonal entry */ 1642 PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i); 1643 1644 current_space->array += nzi; 1645 current_space->local_used += nzi; 1646 current_space->local_remaining -= nzi; 1647 current_space_lvl->array += nzi; 1648 current_space_lvl->local_used += nzi; 1649 current_space_lvl->local_remaining -= nzi; 1650 } 1651 1652 PetscCall(ISRestoreIndices(isrow, &r)); 1653 PetscCall(ISRestoreIndices(isicol, &ic)); 1654 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1655 PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj)); 1656 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 1657 1658 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 1659 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 1660 PetscCall(PetscFree2(bj_ptr, bjlvl_ptr)); 1661 1662 #if defined(PETSC_USE_INFO) 1663 { 1664 PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1665 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 1666 PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af)); 1667 PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af)); 1668 PetscCall(PetscInfo(A, "for best performance.\n")); 1669 if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount)); 1670 } 1671 #endif 1672 /* put together the new matrix */ 1673 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL)); 1674 b = (Mat_SeqAIJ *)fact->data; 1675 b->free_ij = PETSC_TRUE; 1676 PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a)); 1677 b->free_a = PETSC_TRUE; 1678 b->j = bj; 1679 b->i = bi; 1680 b->diag = bdiag; 1681 b->ilen = NULL; 1682 b->imax = NULL; 1683 b->row = isrow; 1684 b->col = iscol; 1685 PetscCall(PetscObjectReference((PetscObject)isrow)); 1686 PetscCall(PetscObjectReference((PetscObject)iscol)); 1687 b->icol = isicol; 1688 1689 PetscCall(PetscMalloc1(n, &b->solve_work)); 1690 /* In b structure: Free imax, ilen, old a, old j. 1691 Allocate bdiag, solve_work, new a, new j */ 1692 b->maxnz = b->nz = bdiag[0] + 1; 1693 1694 fact->info.factor_mallocs = reallocs; 1695 fact->info.fill_ratio_given = f; 1696 fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 1697 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1698 if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1699 PetscCall(MatSeqAIJCheckInode_FactorLU(fact)); 1700 PetscFunctionReturn(PETSC_SUCCESS); 1701 } 1702 1703 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info) 1704 { 1705 Mat C = B; 1706 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1707 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 1708 IS ip = b->row, iip = b->icol; 1709 const PetscInt *rip, *riip; 1710 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 1711 PetscInt *ai = a->i, *aj = a->j; 1712 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 1713 MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi; 1714 PetscBool perm_identity; 1715 FactorShiftCtx sctx; 1716 PetscReal rs; 1717 const MatScalar *aa, *v; 1718 MatScalar d; 1719 const PetscInt *adiag; 1720 1721 PetscFunctionBegin; 1722 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1723 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1724 /* MatPivotSetUp(): initialize shift context sctx */ 1725 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1726 1727 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 1728 sctx.shift_top = info->zeropivot; 1729 for (i = 0; i < mbs; i++) { 1730 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1731 d = aa[adiag[i]]; 1732 rs = -PetscAbsScalar(d) - PetscRealPart(d); 1733 v = aa + ai[i]; 1734 nz = ai[i + 1] - ai[i]; 1735 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 1736 if (rs > sctx.shift_top) sctx.shift_top = rs; 1737 } 1738 sctx.shift_top *= 1.1; 1739 sctx.nshift_max = 5; 1740 sctx.shift_lo = 0.; 1741 sctx.shift_hi = 1.; 1742 } 1743 1744 PetscCall(ISGetIndices(ip, &rip)); 1745 PetscCall(ISGetIndices(iip, &riip)); 1746 1747 /* allocate working arrays 1748 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 1749 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 1750 */ 1751 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 1752 1753 do { 1754 sctx.newshift = PETSC_FALSE; 1755 1756 for (i = 0; i < mbs; i++) c2r[i] = mbs; 1757 if (mbs) il[0] = 0; 1758 1759 for (k = 0; k < mbs; k++) { 1760 /* zero rtmp */ 1761 nz = bi[k + 1] - bi[k]; 1762 bjtmp = bj + bi[k]; 1763 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 1764 1765 /* load in initial unfactored row */ 1766 bval = ba + bi[k]; 1767 jmin = ai[rip[k]]; 1768 jmax = ai[rip[k] + 1]; 1769 for (j = jmin; j < jmax; j++) { 1770 col = riip[aj[j]]; 1771 if (col >= k) { /* only take upper triangular entry */ 1772 rtmp[col] = aa[j]; 1773 *bval++ = 0.0; /* for in-place factorization */ 1774 } 1775 } 1776 /* shift the diagonal of the matrix: ZeropivotApply() */ 1777 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 1778 1779 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1780 dk = rtmp[k]; 1781 i = c2r[k]; /* first row to be added to k_th row */ 1782 1783 while (i < k) { 1784 nexti = c2r[i]; /* next row to be added to k_th row */ 1785 1786 /* compute multiplier, update diag(k) and U(i,k) */ 1787 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1788 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 1789 dk += uikdi * ba[ili]; /* update diag[k] */ 1790 ba[ili] = uikdi; /* -U(i,k) */ 1791 1792 /* add multiple of row i to k-th row */ 1793 jmin = ili + 1; 1794 jmax = bi[i + 1]; 1795 if (jmin < jmax) { 1796 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1797 /* update il and c2r for row i */ 1798 il[i] = jmin; 1799 j = bj[jmin]; 1800 c2r[i] = c2r[j]; 1801 c2r[j] = i; 1802 } 1803 i = nexti; 1804 } 1805 1806 /* copy data into U(k,:) */ 1807 rs = 0.0; 1808 jmin = bi[k]; 1809 jmax = bi[k + 1] - 1; 1810 if (jmin < jmax) { 1811 for (j = jmin; j < jmax; j++) { 1812 col = bj[j]; 1813 ba[j] = rtmp[col]; 1814 rs += PetscAbsScalar(ba[j]); 1815 } 1816 /* add the k-th row into il and c2r */ 1817 il[k] = jmin; 1818 i = bj[jmin]; 1819 c2r[k] = c2r[i]; 1820 c2r[i] = k; 1821 } 1822 1823 /* MatPivotCheck() */ 1824 sctx.rs = rs; 1825 sctx.pv = dk; 1826 PetscCall(MatPivotCheck(B, A, info, &sctx, i)); 1827 if (sctx.newshift) break; 1828 dk = sctx.pv; 1829 1830 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 1831 } 1832 } while (sctx.newshift); 1833 1834 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 1835 PetscCall(PetscFree3(rtmp, il, c2r)); 1836 PetscCall(ISRestoreIndices(ip, &rip)); 1837 PetscCall(ISRestoreIndices(iip, &riip)); 1838 1839 PetscCall(ISIdentity(ip, &perm_identity)); 1840 if (perm_identity) { 1841 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1842 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1843 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1844 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1845 } else { 1846 B->ops->solve = MatSolve_SeqSBAIJ_1; 1847 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1848 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1849 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1850 } 1851 1852 C->assembled = PETSC_TRUE; 1853 C->preallocated = PETSC_TRUE; 1854 1855 PetscCall(PetscLogFlops(C->rmap->n)); 1856 1857 /* MatPivotView() */ 1858 if (sctx.nshift) { 1859 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1860 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 1861 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1862 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1863 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 1864 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 1865 } 1866 } 1867 PetscFunctionReturn(PETSC_SUCCESS); 1868 } 1869 1870 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info) 1871 { 1872 Mat C = B; 1873 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 1874 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data; 1875 IS ip = b->row, iip = b->icol; 1876 const PetscInt *rip, *riip; 1877 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp; 1878 PetscInt *ai = a->i, *aj = a->j; 1879 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 1880 MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi; 1881 const MatScalar *aa, *v; 1882 PetscBool perm_identity; 1883 FactorShiftCtx sctx; 1884 PetscReal rs; 1885 MatScalar d; 1886 const PetscInt *adiag; 1887 1888 PetscFunctionBegin; 1889 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 1890 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 1891 /* MatPivotSetUp(): initialize shift context sctx */ 1892 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1893 1894 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 1895 sctx.shift_top = info->zeropivot; 1896 for (i = 0; i < mbs; i++) { 1897 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1898 d = aa[adiag[i]]; 1899 rs = -PetscAbsScalar(d) - PetscRealPart(d); 1900 v = aa + ai[i]; 1901 nz = ai[i + 1] - ai[i]; 1902 for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]); 1903 if (rs > sctx.shift_top) sctx.shift_top = rs; 1904 } 1905 sctx.shift_top *= 1.1; 1906 sctx.nshift_max = 5; 1907 sctx.shift_lo = 0.; 1908 sctx.shift_hi = 1.; 1909 } 1910 1911 PetscCall(ISGetIndices(ip, &rip)); 1912 PetscCall(ISGetIndices(iip, &riip)); 1913 1914 /* initialization */ 1915 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 1916 1917 do { 1918 sctx.newshift = PETSC_FALSE; 1919 1920 for (i = 0; i < mbs; i++) jl[i] = mbs; 1921 il[0] = 0; 1922 1923 for (k = 0; k < mbs; k++) { 1924 /* zero rtmp */ 1925 nz = bi[k + 1] - bi[k]; 1926 bjtmp = bj + bi[k]; 1927 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 1928 1929 bval = ba + bi[k]; 1930 /* initialize k-th row by the perm[k]-th row of A */ 1931 jmin = ai[rip[k]]; 1932 jmax = ai[rip[k] + 1]; 1933 for (j = jmin; j < jmax; j++) { 1934 col = riip[aj[j]]; 1935 if (col >= k) { /* only take upper triangular entry */ 1936 rtmp[col] = aa[j]; 1937 *bval++ = 0.0; /* for in-place factorization */ 1938 } 1939 } 1940 /* shift the diagonal of the matrix */ 1941 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1942 1943 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1944 dk = rtmp[k]; 1945 i = jl[k]; /* first row to be added to k_th row */ 1946 1947 while (i < k) { 1948 nexti = jl[i]; /* next row to be added to k_th row */ 1949 1950 /* compute multiplier, update diag(k) and U(i,k) */ 1951 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1952 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 1953 dk += uikdi * ba[ili]; 1954 ba[ili] = uikdi; /* -U(i,k) */ 1955 1956 /* add multiple of row i to k-th row */ 1957 jmin = ili + 1; 1958 jmax = bi[i + 1]; 1959 if (jmin < jmax) { 1960 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1961 /* update il and jl for row i */ 1962 il[i] = jmin; 1963 j = bj[jmin]; 1964 jl[i] = jl[j]; 1965 jl[j] = i; 1966 } 1967 i = nexti; 1968 } 1969 1970 /* shift the diagonals when zero pivot is detected */ 1971 /* compute rs=sum of abs(off-diagonal) */ 1972 rs = 0.0; 1973 jmin = bi[k] + 1; 1974 nz = bi[k + 1] - jmin; 1975 bcol = bj + jmin; 1976 for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]); 1977 1978 sctx.rs = rs; 1979 sctx.pv = dk; 1980 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 1981 if (sctx.newshift) break; 1982 dk = sctx.pv; 1983 1984 /* copy data into U(k,:) */ 1985 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 1986 jmin = bi[k] + 1; 1987 jmax = bi[k + 1]; 1988 if (jmin < jmax) { 1989 for (j = jmin; j < jmax; j++) { 1990 col = bj[j]; 1991 ba[j] = rtmp[col]; 1992 } 1993 /* add the k-th row into il and jl */ 1994 il[k] = jmin; 1995 i = bj[jmin]; 1996 jl[k] = jl[i]; 1997 jl[i] = k; 1998 } 1999 } 2000 } while (sctx.newshift); 2001 2002 PetscCall(PetscFree3(rtmp, il, jl)); 2003 PetscCall(ISRestoreIndices(ip, &rip)); 2004 PetscCall(ISRestoreIndices(iip, &riip)); 2005 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 2006 2007 PetscCall(ISIdentity(ip, &perm_identity)); 2008 if (perm_identity) { 2009 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2010 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2011 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2012 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2013 } else { 2014 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2015 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2016 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2017 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2018 } 2019 2020 C->assembled = PETSC_TRUE; 2021 C->preallocated = PETSC_TRUE; 2022 2023 PetscCall(PetscLogFlops(C->rmap->n)); 2024 if (sctx.nshift) { 2025 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2026 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2027 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2028 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 2029 } 2030 } 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 /* 2035 icc() under revised new data structure. 2036 Factored arrays bj and ba are stored as 2037 U(0,:),...,U(i,:),U(n-1,:) 2038 2039 ui=fact->i is an array of size n+1, in which 2040 ui+ 2041 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2042 ui[n]: points to U(n-1,n-1)+1 2043 2044 udiag=fact->diag is an array of size n,in which 2045 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2046 2047 U(i,:) contains udiag[i] as its last entry, i.e., 2048 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2049 */ 2050 2051 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2052 { 2053 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2054 Mat_SeqSBAIJ *b; 2055 PetscBool perm_identity; 2056 PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag; 2057 const PetscInt *rip, *riip, *adiag; 2058 PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow; 2059 PetscInt nlnk, *lnk, *lnk_lvl = NULL; 2060 PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr; 2061 PetscReal fill = info->fill, levels = info->levels; 2062 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2063 PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL; 2064 PetscBT lnkbt; 2065 IS iperm; 2066 PetscBool diagDense; 2067 2068 PetscFunctionBegin; 2069 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2070 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense)); 2071 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 2072 PetscCall(ISIdentity(perm, &perm_identity)); 2073 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2074 2075 PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 2076 PetscCall(PetscMalloc1(am + 1, &udiag)); 2077 ui[0] = 0; 2078 2079 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2080 if (!levels && perm_identity) { 2081 for (i = 0; i < am; i++) { 2082 ncols = ai[i + 1] - adiag[i]; 2083 ui[i + 1] = ui[i] + ncols; 2084 udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */ 2085 } 2086 PetscCall(PetscMalloc1(ui[am] + 1, &uj)); 2087 cols = uj; 2088 for (i = 0; i < am; i++) { 2089 aj = a->j + adiag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2090 ncols = ai[i + 1] - adiag[i] - 1; 2091 for (j = 0; j < ncols; j++) *cols++ = aj[j]; 2092 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2093 } 2094 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2095 PetscCall(ISGetIndices(iperm, &riip)); 2096 PetscCall(ISGetIndices(perm, &rip)); 2097 2098 /* initialization */ 2099 PetscCall(PetscMalloc1(am + 1, &ajtmp)); 2100 2101 /* jl: linked list for storing indices of the pivot rows 2102 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2103 PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il)); 2104 for (i = 0; i < am; i++) { 2105 jl[i] = am; 2106 il[i] = 0; 2107 } 2108 2109 /* create and initialize a linked list for storing column indices of the active row k */ 2110 nlnk = am + 1; 2111 PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt)); 2112 2113 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2114 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2115 current_space = free_space; 2116 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl)); 2117 current_space_lvl = free_space_lvl; 2118 2119 for (k = 0; k < am; k++) { /* for each active row k */ 2120 /* initialize lnk by the column indices of row rip[k] of A */ 2121 nzk = 0; 2122 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2123 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2124 ncols_upper = 0; 2125 for (j = 0; j < ncols; j++) { 2126 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2127 if (riip[i] >= k) { /* only take upper triangular entry */ 2128 ajtmp[ncols_upper] = i; 2129 ncols_upper++; 2130 } 2131 } 2132 PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt)); 2133 nzk += nlnk; 2134 2135 /* update lnk by computing fill-in for each pivot row to be merged in */ 2136 prow = jl[k]; /* 1st pivot row */ 2137 2138 while (prow < k) { 2139 nextprow = jl[prow]; 2140 2141 /* merge prow into k-th row */ 2142 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2143 jmax = ui[prow + 1]; 2144 ncols = jmax - jmin; 2145 i = jmin - ui[prow]; 2146 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2147 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2148 j = *(uj - 1); 2149 PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j)); 2150 nzk += nlnk; 2151 2152 /* update il and jl for prow */ 2153 if (jmin < jmax) { 2154 il[prow] = jmin; 2155 j = *cols; 2156 jl[prow] = jl[j]; 2157 jl[j] = prow; 2158 } 2159 prow = nextprow; 2160 } 2161 2162 /* if free space is not available, make more free space */ 2163 if (current_space->local_remaining < nzk) { 2164 i = am - k + 1; /* num of unfactored rows */ 2165 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2166 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2167 PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl)); 2168 reallocs++; 2169 } 2170 2171 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2172 PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k); 2173 PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt)); 2174 2175 /* add the k-th row into il and jl */ 2176 if (nzk > 1) { 2177 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2178 jl[k] = jl[i]; 2179 jl[i] = k; 2180 il[k] = ui[k] + 1; 2181 } 2182 uj_ptr[k] = current_space->array; 2183 uj_lvl_ptr[k] = current_space_lvl->array; 2184 2185 current_space->array += nzk; 2186 current_space->local_used += nzk; 2187 current_space->local_remaining -= nzk; 2188 2189 current_space_lvl->array += nzk; 2190 current_space_lvl->local_used += nzk; 2191 current_space_lvl->local_remaining -= nzk; 2192 2193 ui[k + 1] = ui[k] + nzk; 2194 } 2195 2196 PetscCall(ISRestoreIndices(perm, &rip)); 2197 PetscCall(ISRestoreIndices(iperm, &riip)); 2198 PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il)); 2199 PetscCall(PetscFree(ajtmp)); 2200 2201 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2202 PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj)); 2203 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2204 PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt)); 2205 PetscCall(PetscFreeSpaceDestroy(free_space_lvl)); 2206 2207 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2208 2209 /* put together the new matrix in MATSEQSBAIJ format */ 2210 b = (Mat_SeqSBAIJ *)fact->data; 2211 b->free_ij = PETSC_TRUE; 2212 PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a)); 2213 b->free_a = PETSC_TRUE; 2214 b->j = uj; 2215 b->i = ui; 2216 b->diag = udiag; 2217 b->ilen = NULL; 2218 b->imax = NULL; 2219 b->row = perm; 2220 b->col = perm; 2221 PetscCall(PetscObjectReference((PetscObject)perm)); 2222 PetscCall(PetscObjectReference((PetscObject)perm)); 2223 b->icol = iperm; 2224 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2225 2226 PetscCall(PetscMalloc1(am, &b->solve_work)); 2227 2228 b->maxnz = b->nz = ui[am]; 2229 2230 fact->info.factor_mallocs = reallocs; 2231 fact->info.fill_ratio_given = fill; 2232 if (ai[am] != 0) { 2233 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2234 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2235 } else { 2236 fact->info.fill_ratio_needed = 0.0; 2237 } 2238 #if defined(PETSC_USE_INFO) 2239 if (ai[am] != 0) { 2240 PetscReal af = fact->info.fill_ratio_needed; 2241 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2242 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2243 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2244 } else { 2245 PetscCall(PetscInfo(A, "Empty matrix\n")); 2246 } 2247 #endif 2248 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2249 PetscFunctionReturn(PETSC_SUCCESS); 2250 } 2251 2252 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 2253 { 2254 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2255 Mat_SeqSBAIJ *b; 2256 PetscBool perm_identity; 2257 PetscReal fill = info->fill; 2258 const PetscInt *rip, *riip; 2259 PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow; 2260 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 2261 PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 2262 PetscFreeSpaceList free_space = NULL, current_space = NULL; 2263 PetscBT lnkbt; 2264 IS iperm; 2265 PetscBool diagDense; 2266 2267 PetscFunctionBegin; 2268 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 2269 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense)); 2270 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries"); 2271 2272 /* check whether perm is the identity mapping */ 2273 PetscCall(ISIdentity(perm, &perm_identity)); 2274 PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm)); 2275 PetscCall(ISGetIndices(iperm, &riip)); 2276 PetscCall(ISGetIndices(perm, &rip)); 2277 2278 /* initialization */ 2279 PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui)); 2280 PetscCall(PetscMalloc1(am + 1, &udiag)); 2281 ui[0] = 0; 2282 2283 /* jl: linked list for storing indices of the pivot rows 2284 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2285 PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols)); 2286 for (i = 0; i < am; i++) { 2287 jl[i] = am; 2288 il[i] = 0; 2289 } 2290 2291 /* create and initialize a linked list for storing column indices of the active row k */ 2292 nlnk = am + 1; 2293 PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt)); 2294 2295 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2296 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space)); 2297 current_space = free_space; 2298 2299 for (k = 0; k < am; k++) { /* for each active row k */ 2300 /* initialize lnk by the column indices of row rip[k] of A */ 2301 nzk = 0; 2302 ncols = ai[rip[k] + 1] - ai[rip[k]]; 2303 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k); 2304 ncols_upper = 0; 2305 for (j = 0; j < ncols; j++) { 2306 i = riip[*(aj + ai[rip[k]] + j)]; 2307 if (i >= k) { /* only take upper triangular entry */ 2308 cols[ncols_upper] = i; 2309 ncols_upper++; 2310 } 2311 } 2312 PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt)); 2313 nzk += nlnk; 2314 2315 /* update lnk by computing fill-in for each pivot row to be merged in */ 2316 prow = jl[k]; /* 1st pivot row */ 2317 2318 while (prow < k) { 2319 nextprow = jl[prow]; 2320 /* merge prow into k-th row */ 2321 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2322 jmax = ui[prow + 1]; 2323 ncols = jmax - jmin; 2324 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2325 PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt)); 2326 nzk += nlnk; 2327 2328 /* update il and jl for prow */ 2329 if (jmin < jmax) { 2330 il[prow] = jmin; 2331 j = *uj_ptr; 2332 jl[prow] = jl[j]; 2333 jl[j] = prow; 2334 } 2335 prow = nextprow; 2336 } 2337 2338 /* if free space is not available, make more free space */ 2339 if (current_space->local_remaining < nzk) { 2340 i = am - k + 1; /* num of unfactored rows */ 2341 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2342 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 2343 reallocs++; 2344 } 2345 2346 /* copy data into free space, then initialize lnk */ 2347 PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt)); 2348 2349 /* add the k-th row into il and jl */ 2350 if (nzk > 1) { 2351 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2352 jl[k] = jl[i]; 2353 jl[i] = k; 2354 il[k] = ui[k] + 1; 2355 } 2356 ui_ptr[k] = current_space->array; 2357 2358 current_space->array += nzk; 2359 current_space->local_used += nzk; 2360 current_space->local_remaining -= nzk; 2361 2362 ui[k + 1] = ui[k] + nzk; 2363 } 2364 2365 PetscCall(ISRestoreIndices(perm, &rip)); 2366 PetscCall(ISRestoreIndices(iperm, &riip)); 2367 PetscCall(PetscFree4(ui_ptr, jl, il, cols)); 2368 2369 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2370 PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj)); 2371 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */ 2372 PetscCall(PetscLLDestroy(lnk, lnkbt)); 2373 2374 /* put together the new matrix in MATSEQSBAIJ format */ 2375 b = (Mat_SeqSBAIJ *)fact->data; 2376 b->free_ij = PETSC_TRUE; 2377 PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a)); 2378 b->free_a = PETSC_TRUE; 2379 b->j = uj; 2380 b->i = ui; 2381 b->diag = udiag; 2382 b->ilen = NULL; 2383 b->imax = NULL; 2384 b->row = perm; 2385 b->col = perm; 2386 2387 PetscCall(PetscObjectReference((PetscObject)perm)); 2388 PetscCall(PetscObjectReference((PetscObject)perm)); 2389 2390 b->icol = iperm; 2391 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2392 2393 PetscCall(PetscMalloc1(am, &b->solve_work)); 2394 2395 b->maxnz = b->nz = ui[am]; 2396 2397 fact->info.factor_mallocs = reallocs; 2398 fact->info.fill_ratio_given = fill; 2399 if (ai[am] != 0) { 2400 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2401 fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am); 2402 } else { 2403 fact->info.fill_ratio_needed = 0.0; 2404 } 2405 #if defined(PETSC_USE_INFO) 2406 if (ai[am] != 0) { 2407 PetscReal af = fact->info.fill_ratio_needed; 2408 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 2409 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2410 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 2411 } else { 2412 PetscCall(PetscInfo(A, "Empty matrix\n")); 2413 } 2414 #endif 2415 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2416 PetscFunctionReturn(PETSC_SUCCESS); 2417 } 2418 2419 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx) 2420 { 2421 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2422 PetscInt n = A->rmap->n; 2423 const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi; 2424 PetscScalar *x, sum; 2425 const PetscScalar *b; 2426 const MatScalar *aa, *v; 2427 PetscInt i, nz; 2428 2429 PetscFunctionBegin; 2430 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 2431 2432 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 2433 PetscCall(VecGetArrayRead(bb, &b)); 2434 PetscCall(VecGetArrayWrite(xx, &x)); 2435 2436 /* forward solve the lower triangular */ 2437 x[0] = b[0]; 2438 v = aa; 2439 vi = aj; 2440 for (i = 1; i < n; i++) { 2441 nz = ai[i + 1] - ai[i]; 2442 sum = b[i]; 2443 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 2444 v += nz; 2445 vi += nz; 2446 x[i] = sum; 2447 } 2448 2449 /* backward solve the upper triangular */ 2450 for (i = n - 1; i >= 0; i--) { 2451 v = aa + adiag[i + 1] + 1; 2452 vi = aj + adiag[i + 1] + 1; 2453 nz = adiag[i] - adiag[i + 1] - 1; 2454 sum = x[i]; 2455 PetscSparseDenseMinusDot(sum, x, v, vi, nz); 2456 x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 2457 } 2458 2459 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 2460 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 2461 PetscCall(VecRestoreArrayRead(bb, &b)); 2462 PetscCall(VecRestoreArrayWrite(xx, &x)); 2463 PetscFunctionReturn(PETSC_SUCCESS); 2464 } 2465 2466 PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx) 2467 { 2468 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 2469 IS iscol = a->col, isrow = a->row; 2470 PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz; 2471 const PetscInt *rout, *cout, *r, *c; 2472 PetscScalar *x, *tmp, sum; 2473 const PetscScalar *b; 2474 const MatScalar *aa, *v; 2475 2476 PetscFunctionBegin; 2477 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 2478 2479 PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 2480 PetscCall(VecGetArrayRead(bb, &b)); 2481 PetscCall(VecGetArrayWrite(xx, &x)); 2482 tmp = a->solve_work; 2483 2484 PetscCall(ISGetIndices(isrow, &rout)); 2485 r = rout; 2486 PetscCall(ISGetIndices(iscol, &cout)); 2487 c = cout; 2488 2489 /* forward solve the lower triangular */ 2490 tmp[0] = b[r[0]]; 2491 v = aa; 2492 vi = aj; 2493 for (i = 1; i < n; i++) { 2494 nz = ai[i + 1] - ai[i]; 2495 sum = b[r[i]]; 2496 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 2497 tmp[i] = sum; 2498 v += nz; 2499 vi += nz; 2500 } 2501 2502 /* backward solve the upper triangular */ 2503 for (i = n - 1; i >= 0; i--) { 2504 v = aa + adiag[i + 1] + 1; 2505 vi = aj + adiag[i + 1] + 1; 2506 nz = adiag[i] - adiag[i + 1] - 1; 2507 sum = tmp[i]; 2508 PetscSparseDenseMinusDot(sum, tmp, v, vi, nz); 2509 x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */ 2510 } 2511 2512 PetscCall(ISRestoreIndices(isrow, &rout)); 2513 PetscCall(ISRestoreIndices(iscol, &cout)); 2514 PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 2515 PetscCall(VecRestoreArrayRead(bb, &b)); 2516 PetscCall(VecRestoreArrayWrite(xx, &x)); 2517 PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n)); 2518 PetscFunctionReturn(PETSC_SUCCESS); 2519 } 2520