1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz, PetscInt *n_alloc_used) { 9 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 10 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 11 *n_alloc_used += r_nnz; 12 return (*n_alloc_used > retval.n_alloc_icol) ? PETSC_FALSE : PETSC_TRUE; 13 } 14 15 /* 16 spbas_cholesky_garbage_collect: 17 move the rows which have been calculated so far, as well as 18 those currently under construction, to the front of the 19 array, while putting them in the proper order. 20 When it seems necessary, enlarge the current arrays. 21 22 NB: re-allocation is being simulated using 23 PetscMalloc, memcpy, PetscFree, because 24 PetscRealloc does not seem to exist. 25 26 */ 27 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 28 Only the storage, not the contents of this matrix is changed in this function */ 29 PetscInt i_row, /* I : Number of rows for which the final contents are known */ 30 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 31 places in the arrays: they need not be moved any more */ 32 PetscInt *n_alloc_used, /* I/O: */ 33 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 34 { 35 /* PSEUDO-CODE: 36 1. Choose the appropriate size for the arrays 37 2. Rescue the arrays which would be lost during garbage collection 38 3. Reallocate and correct administration 39 4. Move all arrays so that they are in proper order */ 40 41 PetscInt i, j; 42 PetscInt nrows = result->nrows; 43 PetscInt n_alloc_ok = 0; 44 PetscInt n_alloc_ok_max = 0; 45 PetscInt need_already = 0; 46 PetscInt max_need_extra = 0; 47 PetscInt n_alloc_max, n_alloc_est, n_alloc; 48 PetscInt n_alloc_now = result->n_alloc_icol; 49 PetscInt *alloc_icol_old = result->alloc_icol; 50 PetscScalar *alloc_val_old = result->alloc_val; 51 PetscInt *icol_rescue; 52 PetscScalar *val_rescue; 53 PetscInt n_rescue; 54 PetscInt i_here, i_last, n_copy; 55 const PetscReal xtra_perc = 20; 56 57 PetscFunctionBegin; 58 /********************************************************* 59 1. Choose appropriate array size 60 Count number of rows and memory usage which is already final */ 61 for (i = 0; i < i_row; i++) { 62 n_alloc_ok += result->row_nnz[i]; 63 n_alloc_ok_max += max_row_nnz[i]; 64 } 65 66 /* Count currently needed memory usage and future memory requirements 67 (max, predicted)*/ 68 for (i = i_row; i < nrows; i++) { 69 if (!result->row_nnz[i]) { 70 max_need_extra += max_row_nnz[i]; 71 } else { 72 need_already += max_row_nnz[i]; 73 } 74 } 75 76 /* Make maximal and realistic memory requirement estimates */ 77 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 78 n_alloc_est = n_alloc_ok + need_already + (int)(((PetscReal)max_need_extra) * ((PetscReal)n_alloc_ok) / ((PetscReal)n_alloc_ok_max)); 79 80 /* Choose array sizes */ 81 if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max; 82 else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now; 83 else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) n_alloc = n_alloc_max; 84 else n_alloc = (int)(n_alloc_est * (1 + xtra_perc / 100.0)); 85 86 /* If new estimate is less than what we already have, 87 don't reallocate, just garbage-collect */ 88 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) n_alloc = result->n_alloc_icol; 89 90 /* Motivate dimension choice */ 91 PetscCall(PetscInfo(NULL, " Allocating %" PetscInt_FMT " nonzeros: ", n_alloc)); 92 if (n_alloc_max == n_alloc_est) { 93 PetscCall(PetscInfo(NULL, "this is the correct size\n")); 94 } else if (n_alloc_now >= n_alloc_est) { 95 PetscCall(PetscInfo(NULL, "the current size, which seems enough\n")); 96 } else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) { 97 PetscCall(PetscInfo(NULL, "the maximum estimate\n")); 98 } else { 99 PetscCall(PetscInfo(NULL, "%6.2f %% more than the estimate\n", (double)xtra_perc)); 100 } 101 102 /********************************************************** 103 2. Rescue arrays which would be lost 104 Count how many rows and nonzeros will have to be rescued 105 when moving all arrays in place */ 106 n_rescue = 0; 107 if (*n_row_alloc_ok == 0) *n_alloc_used = 0; 108 else { 109 i = *n_row_alloc_ok - 1; 110 111 *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i]; 112 } 113 114 for (i = *n_row_alloc_ok; i < nrows; i++) { 115 i_here = result->icols[i] - result->alloc_icol; 116 i_last = i_here + result->row_nnz[i]; 117 if (result->row_nnz[i] > 0) { 118 if (*n_alloc_used > i_here || i_last > n_alloc) n_rescue += result->row_nnz[i]; 119 120 if (i < i_row) *n_alloc_used += result->row_nnz[i]; 121 else *n_alloc_used += max_row_nnz[i]; 122 } 123 } 124 125 /* Allocate rescue arrays */ 126 PetscCall(PetscMalloc1(n_rescue, &icol_rescue)); 127 PetscCall(PetscMalloc1(n_rescue, &val_rescue)); 128 129 /* Rescue the arrays which need rescuing */ 130 n_rescue = 0; 131 if (*n_row_alloc_ok == 0) *n_alloc_used = 0; 132 else { 133 i = *n_row_alloc_ok - 1; 134 *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i]; 135 } 136 137 for (i = *n_row_alloc_ok; i < nrows; i++) { 138 i_here = result->icols[i] - result->alloc_icol; 139 i_last = i_here + result->row_nnz[i]; 140 if (result->row_nnz[i] > 0) { 141 if (*n_alloc_used > i_here || i_last > n_alloc) { 142 PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i])); 143 PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i])); 144 n_rescue += result->row_nnz[i]; 145 } 146 147 if (i < i_row) *n_alloc_used += result->row_nnz[i]; 148 else *n_alloc_used += max_row_nnz[i]; 149 } 150 } 151 152 /********************************************************** 153 3. Reallocate and correct administration */ 154 155 if (n_alloc != result->n_alloc_icol) { 156 n_copy = PetscMin(n_alloc, result->n_alloc_icol); 157 158 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 159 160 Allocate new icol-data, copy old contents */ 161 PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol)); 162 PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy)); 163 164 /* Update administration, Reset pointers to new arrays */ 165 result->n_alloc_icol = n_alloc; 166 for (i = 0; i < nrows; i++) { 167 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 168 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 169 } 170 171 /* Delete old array */ 172 PetscCall(PetscFree(alloc_icol_old)); 173 174 /* Allocate new value-data, copy old contents */ 175 PetscCall(PetscMalloc1(n_alloc, &result->alloc_val)); 176 PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy)); 177 178 /* Update administration, Reset pointers to new arrays */ 179 result->n_alloc_val = n_alloc; 180 for (i = 0; i < nrows; i++) result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 181 182 /* Delete old array */ 183 PetscCall(PetscFree(alloc_val_old)); 184 } 185 186 /********************************************************* 187 4. Copy all the arrays to their proper places */ 188 n_rescue = 0; 189 if (*n_row_alloc_ok == 0) *n_alloc_used = 0; 190 else { 191 i = *n_row_alloc_ok - 1; 192 193 *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i]; 194 } 195 196 for (i = *n_row_alloc_ok; i < nrows; i++) { 197 i_here = result->icols[i] - result->alloc_icol; 198 i_last = i_here + result->row_nnz[i]; 199 200 result->icols[i] = result->alloc_icol + *n_alloc_used; 201 result->values[i] = result->alloc_val + *n_alloc_used; 202 203 if (result->row_nnz[i] > 0) { 204 if (*n_alloc_used > i_here || i_last > n_alloc) { 205 PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i])); 206 PetscCall(PetscArraycpy(result->values[i], &val_rescue[n_rescue], result->row_nnz[i])); 207 208 n_rescue += result->row_nnz[i]; 209 } else { 210 for (j = 0; j < result->row_nnz[i]; j++) { 211 result->icols[i][j] = result->alloc_icol[i_here + j]; 212 result->values[i][j] = result->alloc_val[i_here + j]; 213 } 214 } 215 if (i < i_row) *n_alloc_used += result->row_nnz[i]; 216 else *n_alloc_used += max_row_nnz[i]; 217 } 218 } 219 220 /* Delete the rescue arrays */ 221 PetscCall(PetscFree(icol_rescue)); 222 PetscCall(PetscFree(val_rescue)); 223 224 *n_row_alloc_ok = i_row; 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 spbas_incomplete_cholesky: 230 incomplete Cholesky decomposition of a square, symmetric, 231 positive definite matrix. 232 233 In case negative diagonals are encountered, function returns 234 NEGATIVE_DIAGONAL. When this happens, call this function again 235 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 236 droptol 237 */ 238 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix *matrix_L, PetscBool *success) { 239 PetscInt jL; 240 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 241 PetscInt *ai = a->i, *aj = a->j; 242 MatScalar *aa = a->a; 243 PetscInt nrows, ncols; 244 PetscInt *max_row_nnz; 245 spbas_matrix retval; 246 PetscScalar *diag; 247 PetscScalar *val; 248 PetscScalar *lvec; 249 PetscScalar epsdiag; 250 PetscInt i, j, k; 251 const PetscBool do_values = PETSC_TRUE; 252 PetscInt *r1_icol; 253 PetscScalar *r1_val; 254 PetscInt *r_icol; 255 PetscInt r_nnz; 256 PetscScalar *r_val; 257 PetscInt *A_icol; 258 PetscInt A_nnz; 259 PetscScalar *A_val; 260 PetscInt *p_icol; 261 PetscInt p_nnz; 262 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 263 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 264 265 PetscFunctionBegin; 266 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 267 PetscCall(MatGetSize(A, &nrows, &ncols)); 268 PetscCall(MatGetTrace(A, &epsdiag)); 269 270 epsdiag *= epsdiag_in / nrows; 271 272 PetscCall(PetscInfo(NULL, " Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag), (double)droptol)); 273 274 if (droptol < 1e-10) droptol = 1e-10; 275 276 retval.nrows = nrows; 277 retval.ncols = nrows; 278 retval.nnz = pattern.nnz / 10; 279 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 280 retval.block_data = PETSC_TRUE; 281 282 PetscCall(spbas_allocate_pattern(&retval, do_values)); 283 PetscCall(PetscArrayzero(retval.row_nnz, nrows)); 284 PetscCall(spbas_allocate_data(&retval)); 285 retval.nnz = 0; 286 287 PetscCall(PetscMalloc1(nrows, &diag)); 288 PetscCall(PetscCalloc1(nrows, &val)); 289 PetscCall(PetscCalloc1(nrows, &lvec)); 290 PetscCall(PetscCalloc1(nrows, &max_row_nnz)); 291 292 /* Count the nonzeros on transpose of pattern */ 293 for (i = 0; i < nrows; i++) { 294 p_nnz = pattern.row_nnz[i]; 295 p_icol = pattern.icols[i]; 296 for (j = 0; j < p_nnz; j++) max_row_nnz[i + p_icol[j]]++; 297 } 298 299 /* Calculate rows of L */ 300 for (i = 0; i < nrows; i++) { 301 p_nnz = pattern.row_nnz[i]; 302 p_icol = pattern.icols[i]; 303 304 r_nnz = retval.row_nnz[i]; 305 r_icol = retval.icols[i]; 306 r_val = retval.values[i]; 307 A_nnz = ai[rip[i] + 1] - ai[rip[i]]; 308 A_icol = &aj[ai[rip[i]]]; 309 A_val = &aa[ai[rip[i]]]; 310 311 /* Calculate val += A(i,i:n)'; */ 312 for (j = 0; j < A_nnz; j++) { 313 k = riip[A_icol[j]]; 314 if (k >= i) val[k] = A_val[j]; 315 } 316 317 /* Add regularization */ 318 val[i] += epsdiag; 319 320 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 321 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 322 for (j = 0; j < r_nnz; j++) { 323 k = r_icol[j]; 324 lvec[k] = diag[k] * r_val[j]; 325 val[i] -= r_val[j] * lvec[k]; 326 } 327 328 /* Calculate the new diagonal */ 329 diag[i] = val[i]; 330 if (PetscRealPart(diag[i]) < droptol) { 331 PetscCall(PetscInfo(NULL, "Error in spbas_incomplete_cholesky:\n")); 332 PetscCall(PetscInfo(NULL, "Negative diagonal in row %" PetscInt_FMT "\n", i + 1)); 333 334 /* Delete the whole matrix at once. */ 335 PetscCall(spbas_delete(retval)); 336 *success = PETSC_FALSE; 337 PetscFunctionReturn(0); 338 } 339 340 /* If necessary, allocate arrays */ 341 if (r_nnz == 0) { 342 PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 343 PetscCheck(success, PETSC_COMM_SELF, PETSC_ERR_MEM, "spbas_cholesky_row_alloc() failed"); 344 r_icol = retval.icols[i]; 345 r_val = retval.values[i]; 346 } 347 348 /* Now, fill in */ 349 r_icol[r_nnz] = i; 350 r_val[r_nnz] = 1.0; 351 r_nnz++; 352 retval.row_nnz[i]++; 353 354 retval.nnz += r_nnz; 355 356 /* Calculate 357 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 358 for (j = 1; j < p_nnz; j++) { 359 k = i + p_icol[j]; 360 r1_icol = retval.icols[k]; 361 r1_val = retval.values[k]; 362 for (jL = 0; jL < retval.row_nnz[k]; jL++) val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 363 val[k] /= diag[i]; 364 365 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k]) < -droptol) { 366 /* If necessary, allocate arrays */ 367 if (!retval.row_nnz[k]) { 368 PetscBool flag, success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 369 if (!success) { 370 PetscCall(spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz)); 371 flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 372 PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation in spbas_cholesky_row_alloc() failed"); 373 r_icol = retval.icols[i]; 374 } 375 } 376 377 retval.icols[k][retval.row_nnz[k]] = i; 378 retval.values[k][retval.row_nnz[k]] = val[k]; 379 retval.row_nnz[k]++; 380 } 381 val[k] = 0; 382 } 383 384 /* Erase the values used in the work arrays */ 385 for (j = 0; j < r_nnz; j++) lvec[r_icol[j]] = 0; 386 } 387 388 PetscCall(PetscFree(lvec)); 389 PetscCall(PetscFree(val)); 390 391 PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz)); 392 PetscCall(PetscFree(max_row_nnz)); 393 394 /* Place the inverse of the diagonals in the matrix */ 395 for (i = 0; i < nrows; i++) { 396 r_nnz = retval.row_nnz[i]; 397 398 retval.values[i][r_nnz - 1] = 1.0 / diag[i]; 399 for (j = 0; j < r_nnz - 1; j++) retval.values[i][j] *= -1; 400 } 401 PetscCall(PetscFree(diag)); 402 *matrix_L = retval; 403 *success = PETSC_TRUE; 404 PetscFunctionReturn(0); 405 } 406