1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz, PetscInt *n_alloc_used) { 9 PetscFunctionBegin; 10 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 11 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 12 *n_alloc_used += r_nnz; 13 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_FALSE); 14 PetscFunctionReturn(PETSC_TRUE); 15 } 16 17 /* 18 spbas_cholesky_garbage_collect: 19 move the rows which have been calculated so far, as well as 20 those currently under construction, to the front of the 21 array, while putting them in the proper order. 22 When it seems necessary, enlarge the current arrays. 23 24 NB: re-allocation is being simulated using 25 PetscMalloc, memcpy, PetscFree, because 26 PetscRealloc does not seem to exist. 27 28 */ 29 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 30 Only the storage, not the contents of this matrix is changed in this function */ 31 PetscInt i_row, /* I : Number of rows for which the final contents are known */ 32 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 33 places in the arrays: they need not be moved any more */ 34 PetscInt *n_alloc_used, /* I/O: */ 35 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 36 { 37 /* PSEUDO-CODE: 38 1. Choose the appropriate size for the arrays 39 2. Rescue the arrays which would be lost during garbage collection 40 3. Reallocate and correct administration 41 4. Move all arrays so that they are in proper order */ 42 43 PetscInt i, j; 44 PetscInt nrows = result->nrows; 45 PetscInt n_alloc_ok = 0; 46 PetscInt n_alloc_ok_max = 0; 47 PetscInt need_already = 0; 48 PetscInt max_need_extra = 0; 49 PetscInt n_alloc_max, n_alloc_est, n_alloc; 50 PetscInt n_alloc_now = result->n_alloc_icol; 51 PetscInt *alloc_icol_old = result->alloc_icol; 52 PetscScalar *alloc_val_old = result->alloc_val; 53 PetscInt *icol_rescue; 54 PetscScalar *val_rescue; 55 PetscInt n_rescue; 56 PetscInt i_here, i_last, n_copy; 57 const PetscReal xtra_perc = 20; 58 59 PetscFunctionBegin; 60 /********************************************************* 61 1. Choose appropriate array size 62 Count number of rows and memory usage which is already final */ 63 for (i = 0; i < i_row; i++) { 64 n_alloc_ok += result->row_nnz[i]; 65 n_alloc_ok_max += max_row_nnz[i]; 66 } 67 68 /* Count currently needed memory usage and future memory requirements 69 (max, predicted)*/ 70 for (i = i_row; i < nrows; i++) { 71 if (!result->row_nnz[i]) { 72 max_need_extra += max_row_nnz[i]; 73 } else { 74 need_already += max_row_nnz[i]; 75 } 76 } 77 78 /* Make maximal and realistic memory requirement estimates */ 79 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 80 n_alloc_est = n_alloc_ok + need_already + (int)(((PetscReal)max_need_extra) * ((PetscReal)n_alloc_ok) / ((PetscReal)n_alloc_ok_max)); 81 82 /* Choose array sizes */ 83 if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max; 84 else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now; 85 else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) n_alloc = n_alloc_max; 86 else n_alloc = (int)(n_alloc_est * (1 + xtra_perc / 100.0)); 87 88 /* If new estimate is less than what we already have, 89 don't reallocate, just garbage-collect */ 90 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { n_alloc = result->n_alloc_icol; } 91 92 /* Motivate dimension choice */ 93 PetscCall(PetscInfo(NULL, " Allocating %" PetscInt_FMT " nonzeros: ", n_alloc)); 94 if (n_alloc_max == n_alloc_est) { 95 PetscCall(PetscInfo(NULL, "this is the correct size\n")); 96 } else if (n_alloc_now >= n_alloc_est) { 97 PetscCall(PetscInfo(NULL, "the current size, which seems enough\n")); 98 } else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) { 99 PetscCall(PetscInfo(NULL, "the maximum estimate\n")); 100 } else { 101 PetscCall(PetscInfo(NULL, "%6.2f %% more than the estimate\n", (double)xtra_perc)); 102 } 103 104 /********************************************************** 105 2. Rescue arrays which would be lost 106 Count how many rows and nonzeros will have to be rescued 107 when moving all arrays in place */ 108 n_rescue = 0; 109 if (*n_row_alloc_ok == 0) *n_alloc_used = 0; 110 else { 111 i = *n_row_alloc_ok - 1; 112 113 *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i]; 114 } 115 116 for (i = *n_row_alloc_ok; i < nrows; i++) { 117 i_here = result->icols[i] - result->alloc_icol; 118 i_last = i_here + result->row_nnz[i]; 119 if (result->row_nnz[i] > 0) { 120 if (*n_alloc_used > i_here || i_last > n_alloc) { n_rescue += result->row_nnz[i]; } 121 122 if (i < i_row) *n_alloc_used += result->row_nnz[i]; 123 else *n_alloc_used += max_row_nnz[i]; 124 } 125 } 126 127 /* Allocate rescue arrays */ 128 PetscCall(PetscMalloc1(n_rescue, &icol_rescue)); 129 PetscCall(PetscMalloc1(n_rescue, &val_rescue)); 130 131 /* Rescue the arrays which need rescuing */ 132 n_rescue = 0; 133 if (*n_row_alloc_ok == 0) *n_alloc_used = 0; 134 else { 135 i = *n_row_alloc_ok - 1; 136 *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i]; 137 } 138 139 for (i = *n_row_alloc_ok; i < nrows; i++) { 140 i_here = result->icols[i] - result->alloc_icol; 141 i_last = i_here + result->row_nnz[i]; 142 if (result->row_nnz[i] > 0) { 143 if (*n_alloc_used > i_here || i_last > n_alloc) { 144 PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i])); 145 PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i])); 146 n_rescue += result->row_nnz[i]; 147 } 148 149 if (i < i_row) *n_alloc_used += result->row_nnz[i]; 150 else *n_alloc_used += max_row_nnz[i]; 151 } 152 } 153 154 /********************************************************** 155 3. Reallocate and correct administration */ 156 157 if (n_alloc != result->n_alloc_icol) { 158 n_copy = PetscMin(n_alloc, result->n_alloc_icol); 159 160 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 161 162 Allocate new icol-data, copy old contents */ 163 PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol)); 164 PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy)); 165 166 /* Update administration, Reset pointers to new arrays */ 167 result->n_alloc_icol = n_alloc; 168 for (i = 0; i < nrows; i++) { 169 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 170 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 171 } 172 173 /* Delete old array */ 174 PetscCall(PetscFree(alloc_icol_old)); 175 176 /* Allocate new value-data, copy old contents */ 177 PetscCall(PetscMalloc1(n_alloc, &result->alloc_val)); 178 PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy)); 179 180 /* Update administration, Reset pointers to new arrays */ 181 result->n_alloc_val = n_alloc; 182 for (i = 0; i < nrows; i++) { result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); } 183 184 /* Delete old array */ 185 PetscCall(PetscFree(alloc_val_old)); 186 } 187 188 /********************************************************* 189 4. Copy all the arrays to their proper places */ 190 n_rescue = 0; 191 if (*n_row_alloc_ok == 0) *n_alloc_used = 0; 192 else { 193 i = *n_row_alloc_ok - 1; 194 195 *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i]; 196 } 197 198 for (i = *n_row_alloc_ok; i < nrows; i++) { 199 i_here = result->icols[i] - result->alloc_icol; 200 i_last = i_here + result->row_nnz[i]; 201 202 result->icols[i] = result->alloc_icol + *n_alloc_used; 203 result->values[i] = result->alloc_val + *n_alloc_used; 204 205 if (result->row_nnz[i] > 0) { 206 if (*n_alloc_used > i_here || i_last > n_alloc) { 207 PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i])); 208 PetscCall(PetscArraycpy(result->values[i], &val_rescue[n_rescue], result->row_nnz[i])); 209 210 n_rescue += result->row_nnz[i]; 211 } else { 212 for (j = 0; j < result->row_nnz[i]; j++) { 213 result->icols[i][j] = result->alloc_icol[i_here + j]; 214 result->values[i][j] = result->alloc_val[i_here + j]; 215 } 216 } 217 if (i < i_row) *n_alloc_used += result->row_nnz[i]; 218 else *n_alloc_used += max_row_nnz[i]; 219 } 220 } 221 222 /* Delete the rescue arrays */ 223 PetscCall(PetscFree(icol_rescue)); 224 PetscCall(PetscFree(val_rescue)); 225 226 *n_row_alloc_ok = i_row; 227 PetscFunctionReturn(0); 228 } 229 230 /* 231 spbas_incomplete_cholesky: 232 incomplete Cholesky decomposition of a square, symmetric, 233 positive definite matrix. 234 235 In case negative diagonals are encountered, function returns 236 NEGATIVE_DIAGONAL. When this happens, call this function again 237 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 238 droptol 239 */ 240 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix *matrix_L, PetscBool *success) { 241 PetscInt jL; 242 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 243 PetscInt *ai = a->i, *aj = a->j; 244 MatScalar *aa = a->a; 245 PetscInt nrows, ncols; 246 PetscInt *max_row_nnz; 247 spbas_matrix retval; 248 PetscScalar *diag; 249 PetscScalar *val; 250 PetscScalar *lvec; 251 PetscScalar epsdiag; 252 PetscInt i, j, k; 253 const PetscBool do_values = PETSC_TRUE; 254 PetscInt *r1_icol; 255 PetscScalar *r1_val; 256 PetscInt *r_icol; 257 PetscInt r_nnz; 258 PetscScalar *r_val; 259 PetscInt *A_icol; 260 PetscInt A_nnz; 261 PetscScalar *A_val; 262 PetscInt *p_icol; 263 PetscInt p_nnz; 264 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 265 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 266 267 PetscFunctionBegin; 268 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 269 PetscCall(MatGetSize(A, &nrows, &ncols)); 270 PetscCall(MatGetTrace(A, &epsdiag)); 271 272 epsdiag *= epsdiag_in / nrows; 273 274 PetscCall(PetscInfo(NULL, " Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag), (double)droptol)); 275 276 if (droptol < 1e-10) droptol = 1e-10; 277 278 retval.nrows = nrows; 279 retval.ncols = nrows; 280 retval.nnz = pattern.nnz / 10; 281 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 282 retval.block_data = PETSC_TRUE; 283 284 PetscCall(spbas_allocate_pattern(&retval, do_values)); 285 PetscCall(PetscArrayzero(retval.row_nnz, nrows)); 286 PetscCall(spbas_allocate_data(&retval)); 287 retval.nnz = 0; 288 289 PetscCall(PetscMalloc1(nrows, &diag)); 290 PetscCall(PetscCalloc1(nrows, &val)); 291 PetscCall(PetscCalloc1(nrows, &lvec)); 292 PetscCall(PetscCalloc1(nrows, &max_row_nnz)); 293 294 /* Count the nonzeros on transpose of pattern */ 295 for (i = 0; i < nrows; i++) { 296 p_nnz = pattern.row_nnz[i]; 297 p_icol = pattern.icols[i]; 298 for (j = 0; j < p_nnz; j++) { max_row_nnz[i + p_icol[j]]++; } 299 } 300 301 /* Calculate rows of L */ 302 for (i = 0; i < nrows; i++) { 303 p_nnz = pattern.row_nnz[i]; 304 p_icol = pattern.icols[i]; 305 306 r_nnz = retval.row_nnz[i]; 307 r_icol = retval.icols[i]; 308 r_val = retval.values[i]; 309 A_nnz = ai[rip[i] + 1] - ai[rip[i]]; 310 A_icol = &aj[ai[rip[i]]]; 311 A_val = &aa[ai[rip[i]]]; 312 313 /* Calculate val += A(i,i:n)'; */ 314 for (j = 0; j < A_nnz; j++) { 315 k = riip[A_icol[j]]; 316 if (k >= i) val[k] = A_val[j]; 317 } 318 319 /* Add regularization */ 320 val[i] += epsdiag; 321 322 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 323 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 324 for (j = 0; j < r_nnz; j++) { 325 k = r_icol[j]; 326 lvec[k] = diag[k] * r_val[j]; 327 val[i] -= r_val[j] * lvec[k]; 328 } 329 330 /* Calculate the new diagonal */ 331 diag[i] = val[i]; 332 if (PetscRealPart(diag[i]) < droptol) { 333 PetscCall(PetscInfo(NULL, "Error in spbas_incomplete_cholesky:\n")); 334 PetscCall(PetscInfo(NULL, "Negative diagonal in row %" PetscInt_FMT "\n", i + 1)); 335 336 /* Delete the whole matrix at once. */ 337 PetscCall(spbas_delete(retval)); 338 *success = PETSC_FALSE; 339 PetscFunctionReturn(0); 340 } 341 342 /* If necessary, allocate arrays */ 343 if (r_nnz == 0) { 344 PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 345 PetscCheck(success, PETSC_COMM_SELF, PETSC_ERR_MEM, "spbas_cholesky_row_alloc() failed"); 346 r_icol = retval.icols[i]; 347 r_val = retval.values[i]; 348 } 349 350 /* Now, fill in */ 351 r_icol[r_nnz] = i; 352 r_val[r_nnz] = 1.0; 353 r_nnz++; 354 retval.row_nnz[i]++; 355 356 retval.nnz += r_nnz; 357 358 /* Calculate 359 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 360 for (j = 1; j < p_nnz; j++) { 361 k = i + p_icol[j]; 362 r1_icol = retval.icols[k]; 363 r1_val = retval.values[k]; 364 for (jL = 0; jL < retval.row_nnz[k]; jL++) { val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; } 365 val[k] /= diag[i]; 366 367 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k]) < -droptol) { 368 /* If necessary, allocate arrays */ 369 if (!retval.row_nnz[k]) { 370 PetscBool flag, success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 371 if (!success) { 372 PetscCall(spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz)); 373 flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 374 PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation in spbas_cholesky_row_alloc() failed"); 375 r_icol = retval.icols[i]; 376 } 377 } 378 379 retval.icols[k][retval.row_nnz[k]] = i; 380 retval.values[k][retval.row_nnz[k]] = val[k]; 381 retval.row_nnz[k]++; 382 } 383 val[k] = 0; 384 } 385 386 /* Erase the values used in the work arrays */ 387 for (j = 0; j < r_nnz; j++) lvec[r_icol[j]] = 0; 388 } 389 390 PetscCall(PetscFree(lvec)); 391 PetscCall(PetscFree(val)); 392 393 PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz)); 394 PetscCall(PetscFree(max_row_nnz)); 395 396 /* Place the inverse of the diagonals in the matrix */ 397 for (i = 0; i < nrows; i++) { 398 r_nnz = retval.row_nnz[i]; 399 400 retval.values[i][r_nnz - 1] = 1.0 / diag[i]; 401 for (j = 0; j < r_nnz - 1; j++) { retval.values[i][j] *= -1; } 402 } 403 PetscCall(PetscFree(diag)); 404 *matrix_L = retval; 405 *success = PETSC_TRUE; 406 PetscFunctionReturn(0); 407 } 408