1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 9 { 10 PetscFunctionBegin; 11 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 12 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 13 *n_alloc_used += r_nnz; 14 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_FALSE); 15 PetscFunctionReturn(PETSC_TRUE); 16 } 17 18 /* 19 spbas_cholesky_garbage_collect: 20 move the rows which have been calculated so far, as well as 21 those currently under construction, to the front of the 22 array, while putting them in the proper order. 23 When it seems necessary, enlarge the current arrays. 24 25 NB: re-allocation is being simulated using 26 PetscMalloc, memcpy, PetscFree, because 27 PetscRealloc does not seem to exist. 28 29 */ 30 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 31 Only the storage, not the contents of this matrix is changed in this function */ 32 PetscInt i_row, /* I : Number of rows for which the final contents are known */ 33 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 34 places in the arrays: they need not be moved any more */ 35 PetscInt *n_alloc_used, /* I/O: */ 36 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 37 { 38 /* PSEUDO-CODE: 39 1. Choose the appropriate size for the arrays 40 2. Rescue the arrays which would be lost during garbage collection 41 3. Reallocate and correct administration 42 4. Move all arrays so that they are in proper order */ 43 44 PetscInt i,j; 45 PetscInt nrows = result->nrows; 46 PetscInt n_alloc_ok =0; 47 PetscInt n_alloc_ok_max=0; 48 PetscInt need_already = 0; 49 PetscInt max_need_extra= 0; 50 PetscInt n_alloc_max, n_alloc_est, n_alloc; 51 PetscInt n_alloc_now = result->n_alloc_icol; 52 PetscInt *alloc_icol_old = result->alloc_icol; 53 PetscScalar *alloc_val_old = result->alloc_val; 54 PetscInt *icol_rescue; 55 PetscScalar *val_rescue; 56 PetscInt n_rescue; 57 PetscInt i_here, i_last, n_copy; 58 const PetscReal xtra_perc = 20; 59 60 PetscFunctionBegin; 61 /********************************************************* 62 1. Choose appropriate array size 63 Count number of rows and memory usage which is already final */ 64 for (i=0; i<i_row; i++) { 65 n_alloc_ok += result->row_nnz[i]; 66 n_alloc_ok_max += max_row_nnz[i]; 67 } 68 69 /* Count currently needed memory usage and future memory requirements 70 (max, predicted)*/ 71 for (i=i_row; i<nrows; i++) { 72 if (!result->row_nnz[i]) { 73 max_need_extra += max_row_nnz[i]; 74 } else { 75 need_already += max_row_nnz[i]; 76 } 77 } 78 79 /* Make maximal and realistic memory requirement estimates */ 80 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 81 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 82 83 /* Choose array sizes */ 84 if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max; 85 else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now; 86 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max; 87 else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); 88 89 /* If new estimate is less than what we already have, 90 don't reallocate, just garbage-collect */ 91 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 92 n_alloc = result->n_alloc_icol; 93 } 94 95 /* Motivate dimension choice */ 96 PetscCall(PetscInfo(NULL," Allocating %" PetscInt_FMT " nonzeros: ",n_alloc)); 97 if (n_alloc_max == n_alloc_est) { 98 PetscCall(PetscInfo(NULL,"this is the correct size\n")); 99 } else if (n_alloc_now >= n_alloc_est) { 100 PetscCall(PetscInfo(NULL,"the current size, which seems enough\n")); 101 } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { 102 PetscCall(PetscInfo(NULL,"the maximum estimate\n")); 103 } else { 104 PetscCall(PetscInfo(NULL,"%6.2f %% more than the estimate\n",(double)xtra_perc)); 105 } 106 107 /********************************************************** 108 2. Rescue arrays which would be lost 109 Count how many rows and nonzeros will have to be rescued 110 when moving all arrays in place */ 111 n_rescue = 0; 112 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 113 else { 114 i = *n_row_alloc_ok - 1; 115 116 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 117 } 118 119 for (i=*n_row_alloc_ok; i<nrows; i++) { 120 i_here = result->icols[i]-result->alloc_icol; 121 i_last = i_here + result->row_nnz[i]; 122 if (result->row_nnz[i]>0) { 123 if (*n_alloc_used > i_here || i_last > n_alloc) { 124 n_rescue += result->row_nnz[i]; 125 } 126 127 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 128 else *n_alloc_used += max_row_nnz[i]; 129 } 130 } 131 132 /* Allocate rescue arrays */ 133 PetscCall(PetscMalloc1(n_rescue, &icol_rescue)); 134 PetscCall(PetscMalloc1(n_rescue, &val_rescue)); 135 136 /* Rescue the arrays which need rescuing */ 137 n_rescue = 0; 138 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 139 else { 140 i = *n_row_alloc_ok - 1; 141 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 142 } 143 144 for (i=*n_row_alloc_ok; i<nrows; i++) { 145 i_here = result->icols[i]-result->alloc_icol; 146 i_last = i_here + result->row_nnz[i]; 147 if (result->row_nnz[i]>0) { 148 if (*n_alloc_used > i_here || i_last > n_alloc) { 149 PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i])); 150 PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i])); 151 n_rescue += result->row_nnz[i]; 152 } 153 154 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 155 else *n_alloc_used += max_row_nnz[i]; 156 } 157 } 158 159 /********************************************************** 160 3. Reallocate and correct administration */ 161 162 if (n_alloc != result->n_alloc_icol) { 163 n_copy = PetscMin(n_alloc,result->n_alloc_icol); 164 165 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 166 167 Allocate new icol-data, copy old contents */ 168 PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol)); 169 PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy)); 170 171 /* Update administration, Reset pointers to new arrays */ 172 result->n_alloc_icol = n_alloc; 173 for (i=0; i<nrows; i++) { 174 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 175 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 176 } 177 178 /* Delete old array */ 179 PetscCall(PetscFree(alloc_icol_old)); 180 181 /* Allocate new value-data, copy old contents */ 182 PetscCall(PetscMalloc1(n_alloc, &result->alloc_val)); 183 PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy)); 184 185 /* Update administration, Reset pointers to new arrays */ 186 result->n_alloc_val = n_alloc; 187 for (i=0; i<nrows; i++) { 188 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 189 } 190 191 /* Delete old array */ 192 PetscCall(PetscFree(alloc_val_old)); 193 } 194 195 /********************************************************* 196 4. Copy all the arrays to their proper places */ 197 n_rescue = 0; 198 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 199 else { 200 i = *n_row_alloc_ok - 1; 201 202 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 203 } 204 205 for (i=*n_row_alloc_ok; i<nrows; i++) { 206 i_here = result->icols[i]-result->alloc_icol; 207 i_last = i_here + result->row_nnz[i]; 208 209 result->icols[i] = result->alloc_icol + *n_alloc_used; 210 result->values[i]= result->alloc_val + *n_alloc_used; 211 212 if (result->row_nnz[i]>0) { 213 if (*n_alloc_used > i_here || i_last > n_alloc) { 214 PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i])); 215 PetscCall(PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i])); 216 217 n_rescue += result->row_nnz[i]; 218 } else { 219 for (j=0; j<result->row_nnz[i]; j++) { 220 result->icols[i][j] = result->alloc_icol[i_here+j]; 221 result->values[i][j] = result->alloc_val[i_here+j]; 222 } 223 } 224 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 225 else *n_alloc_used += max_row_nnz[i]; 226 } 227 } 228 229 /* Delete the rescue arrays */ 230 PetscCall(PetscFree(icol_rescue)); 231 PetscCall(PetscFree(val_rescue)); 232 233 *n_row_alloc_ok = i_row; 234 PetscFunctionReturn(0); 235 } 236 237 /* 238 spbas_incomplete_cholesky: 239 incomplete Cholesky decomposition of a square, symmetric, 240 positive definite matrix. 241 242 In case negative diagonals are encountered, function returns 243 NEGATIVE_DIAGONAL. When this happens, call this function again 244 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 245 droptol 246 */ 247 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L,PetscBool *success) 248 { 249 PetscInt jL; 250 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 251 PetscInt *ai=a->i,*aj=a->j; 252 MatScalar *aa=a->a; 253 PetscInt nrows, ncols; 254 PetscInt *max_row_nnz; 255 spbas_matrix retval; 256 PetscScalar *diag; 257 PetscScalar *val; 258 PetscScalar *lvec; 259 PetscScalar epsdiag; 260 PetscInt i,j,k; 261 const PetscBool do_values = PETSC_TRUE; 262 PetscInt *r1_icol; 263 PetscScalar *r1_val; 264 PetscInt *r_icol; 265 PetscInt r_nnz; 266 PetscScalar *r_val; 267 PetscInt *A_icol; 268 PetscInt A_nnz; 269 PetscScalar *A_val; 270 PetscInt *p_icol; 271 PetscInt p_nnz; 272 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 273 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 274 275 PetscFunctionBegin; 276 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 277 PetscCall(MatGetSize(A, &nrows, &ncols)); 278 PetscCall(MatGetTrace(A, &epsdiag)); 279 280 epsdiag *= epsdiag_in / nrows; 281 282 PetscCall(PetscInfo(NULL," Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol)); 283 284 if (droptol<1e-10) droptol=1e-10; 285 286 retval.nrows = nrows; 287 retval.ncols = nrows; 288 retval.nnz = pattern.nnz/10; 289 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 290 retval.block_data = PETSC_TRUE; 291 292 PetscCall(spbas_allocate_pattern(&retval, do_values)); 293 PetscCall(PetscArrayzero(retval.row_nnz, nrows)); 294 PetscCall(spbas_allocate_data(&retval)); 295 retval.nnz = 0; 296 297 PetscCall(PetscMalloc1(nrows, &diag)); 298 PetscCall(PetscCalloc1(nrows, &val)); 299 PetscCall(PetscCalloc1(nrows, &lvec)); 300 PetscCall(PetscCalloc1(nrows, &max_row_nnz)); 301 302 /* Count the nonzeros on transpose of pattern */ 303 for (i = 0; i<nrows; i++) { 304 p_nnz = pattern.row_nnz[i]; 305 p_icol = pattern.icols[i]; 306 for (j=0; j<p_nnz; j++) { 307 max_row_nnz[i+p_icol[j]]++; 308 } 309 } 310 311 /* Calculate rows of L */ 312 for (i = 0; i<nrows; i++) { 313 p_nnz = pattern.row_nnz[i]; 314 p_icol = pattern.icols[i]; 315 316 r_nnz = retval.row_nnz[i]; 317 r_icol = retval.icols[i]; 318 r_val = retval.values[i]; 319 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 320 A_icol = &aj[ai[rip[i]]]; 321 A_val = &aa[ai[rip[i]]]; 322 323 /* Calculate val += A(i,i:n)'; */ 324 for (j=0; j<A_nnz; j++) { 325 k = riip[A_icol[j]]; 326 if (k>=i) val[k] = A_val[j]; 327 } 328 329 /* Add regularization */ 330 val[i] += epsdiag; 331 332 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 333 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 334 for (j=0; j<r_nnz; j++) { 335 k = r_icol[j]; 336 lvec[k] = diag[k] * r_val[j]; 337 val[i] -= r_val[j] * lvec[k]; 338 } 339 340 /* Calculate the new diagonal */ 341 diag[i] = val[i]; 342 if (PetscRealPart(diag[i])<droptol) { 343 PetscCall(PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n")); 344 PetscCall(PetscInfo(NULL,"Negative diagonal in row %" PetscInt_FMT "\n",i+1)); 345 346 /* Delete the whole matrix at once. */ 347 PetscCall(spbas_delete(retval)); 348 *success = PETSC_FALSE; 349 PetscFunctionReturn(0); 350 } 351 352 /* If necessary, allocate arrays */ 353 if (r_nnz==0) { 354 PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 355 PetscCheck(success,PETSC_COMM_SELF,PETSC_ERR_MEM,"spbas_cholesky_row_alloc() failed"); 356 r_icol = retval.icols[i]; 357 r_val = retval.values[i]; 358 } 359 360 /* Now, fill in */ 361 r_icol[r_nnz] = i; 362 r_val [r_nnz] = 1.0; 363 r_nnz++; 364 retval.row_nnz[i]++; 365 366 retval.nnz += r_nnz; 367 368 /* Calculate 369 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 370 for (j=1; j<p_nnz; j++) { 371 k = i+p_icol[j]; 372 r1_icol = retval.icols[k]; 373 r1_val = retval.values[k]; 374 for (jL=0; jL<retval.row_nnz[k]; jL++) { 375 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 376 } 377 val[k] /= diag[i]; 378 379 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) { 380 /* If necessary, allocate arrays */ 381 if (!retval.row_nnz[k]) { 382 PetscBool flag,success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 383 if (!success) { 384 PetscCall(spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz)); 385 flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 386 PetscCheck(flag,PETSC_COMM_SELF,PETSC_ERR_MEM,"Allocation in spbas_cholesky_row_alloc() failed"); 387 r_icol = retval.icols[i]; 388 } 389 } 390 391 retval.icols[k][retval.row_nnz[k]] = i; 392 retval.values[k][retval.row_nnz[k]] = val[k]; 393 retval.row_nnz[k]++; 394 } 395 val[k] = 0; 396 } 397 398 /* Erase the values used in the work arrays */ 399 for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0; 400 } 401 402 PetscCall(PetscFree(lvec)); 403 PetscCall(PetscFree(val)); 404 405 PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz)); 406 PetscCall(PetscFree(max_row_nnz)); 407 408 /* Place the inverse of the diagonals in the matrix */ 409 for (i=0; i<nrows; i++) { 410 r_nnz = retval.row_nnz[i]; 411 412 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 413 for (j=0; j<r_nnz-1; j++) { 414 retval.values[i][j] *= -1; 415 } 416 } 417 PetscCall(PetscFree(diag)); 418 *matrix_L = retval; 419 *success = PETSC_TRUE; 420 PetscFunctionReturn(0); 421 } 422