1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 9 { 10 PetscFunctionBegin; 11 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 12 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 13 *n_alloc_used += r_nnz; 14 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM); 15 PetscFunctionReturn(0); 16 } 17 18 /* 19 spbas_cholesky_garbage_collect: 20 move the rows which have been calculated so far, as well as 21 those currently under construction, to the front of the 22 array, while putting them in the proper order. 23 When it seems necessary, enlarge the current arrays. 24 25 NB: re-allocation is being simulated using 26 PetscMalloc, memcpy, PetscFree, because 27 PetscRealloc does not seem to exist. 28 29 */ 30 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 31 Only the storage, not the contents of this matrix is changed in this function */ 32 PetscInt i_row, /* I : Number of rows for which the final contents are known */ 33 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 34 places in the arrays: they need not be moved any more */ 35 PetscInt *n_alloc_used, /* I/O: */ 36 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 37 { 38 /* PSEUDO-CODE: 39 1. Choose the appropriate size for the arrays 40 2. Rescue the arrays which would be lost during garbage collection 41 3. Reallocate and correct administration 42 4. Move all arrays so that they are in proper order */ 43 44 PetscInt i,j; 45 PetscInt nrows = result->nrows; 46 PetscInt n_alloc_ok =0; 47 PetscInt n_alloc_ok_max=0; 48 PetscErrorCode ierr; 49 PetscInt need_already = 0; 50 PetscInt n_rows_ahead =0; 51 PetscInt max_need_extra= 0; 52 PetscInt n_alloc_max, n_alloc_est, n_alloc; 53 PetscInt n_alloc_now = result->n_alloc_icol; 54 PetscInt *alloc_icol_old = result->alloc_icol; 55 PetscScalar *alloc_val_old = result->alloc_val; 56 PetscInt *icol_rescue; 57 PetscScalar *val_rescue; 58 PetscInt n_rescue; 59 PetscInt n_row_rescue; 60 PetscInt i_here, i_last, n_copy; 61 const PetscReal xtra_perc = 20; 62 63 PetscFunctionBegin; 64 /********************************************************* 65 1. Choose appropriate array size 66 Count number of rows and memory usage which is already final */ 67 for (i=0; i<i_row; i++) { 68 n_alloc_ok += result->row_nnz[i]; 69 n_alloc_ok_max += max_row_nnz[i]; 70 } 71 72 /* Count currently needed memory usage and future memory requirements 73 (max, predicted)*/ 74 for (i=i_row; i<nrows; i++) { 75 if (!result->row_nnz[i]) { 76 max_need_extra += max_row_nnz[i]; 77 } else { 78 need_already += max_row_nnz[i]; 79 n_rows_ahead++; 80 } 81 } 82 83 /* Make maximal and realistic memory requirement estimates */ 84 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 85 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 86 87 /* Choose array sizes */ 88 if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max; 89 else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now; 90 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max; 91 else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); 92 93 /* If new estimate is less than what we already have, 94 don't reallocate, just garbage-collect */ 95 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 96 n_alloc = result->n_alloc_icol; 97 } 98 99 /* Motivate dimension choice */ 100 ierr = PetscInfo1(NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 101 if (n_alloc_max == n_alloc_est) { 102 ierr = PetscInfo(NULL,"this is the correct size\n");CHKERRQ(ierr); 103 } else if (n_alloc_now >= n_alloc_est) { 104 ierr = PetscInfo(NULL,"the current size, which seems enough\n");CHKERRQ(ierr); 105 } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { 106 ierr = PetscInfo(NULL,"the maximum estimate\n");CHKERRQ(ierr); 107 } else { 108 ierr = PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); 109 } 110 111 /********************************************************** 112 2. Rescue arrays which would be lost 113 Count how many rows and nonzeros will have to be rescued 114 when moving all arrays in place */ 115 n_row_rescue = 0; n_rescue = 0; 116 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 117 else { 118 i = *n_row_alloc_ok - 1; 119 120 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 121 } 122 123 for (i=*n_row_alloc_ok; i<nrows; i++) { 124 i_here = result->icols[i]-result->alloc_icol; 125 i_last = i_here + result->row_nnz[i]; 126 if (result->row_nnz[i]>0) { 127 if (*n_alloc_used > i_here || i_last > n_alloc) { 128 n_rescue += result->row_nnz[i]; 129 n_row_rescue++; 130 } 131 132 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 133 else *n_alloc_used += max_row_nnz[i]; 134 } 135 } 136 137 /* Allocate rescue arrays */ 138 ierr = PetscMalloc1(n_rescue, &icol_rescue);CHKERRQ(ierr); 139 ierr = PetscMalloc1(n_rescue, &val_rescue);CHKERRQ(ierr); 140 141 /* Rescue the arrays which need rescuing */ 142 n_row_rescue = 0; n_rescue = 0; 143 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 144 else { 145 i = *n_row_alloc_ok - 1; 146 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 147 } 148 149 for (i=*n_row_alloc_ok; i<nrows; i++) { 150 i_here = result->icols[i]-result->alloc_icol; 151 i_last = i_here + result->row_nnz[i]; 152 if (result->row_nnz[i]>0) { 153 if (*n_alloc_used > i_here || i_last > n_alloc) { 154 ierr = PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);CHKERRQ(ierr); 155 ierr = PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);CHKERRQ(ierr); 156 n_rescue += result->row_nnz[i]; 157 n_row_rescue++; 158 } 159 160 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 161 else *n_alloc_used += max_row_nnz[i]; 162 } 163 } 164 165 /********************************************************** 166 3. Reallocate and correct administration */ 167 168 if (n_alloc != result->n_alloc_icol) { 169 n_copy = PetscMin(n_alloc,result->n_alloc_icol); 170 171 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 172 173 Allocate new icol-data, copy old contents */ 174 ierr = PetscMalloc1(n_alloc, &result->alloc_icol);CHKERRQ(ierr); 175 ierr = PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);CHKERRQ(ierr); 176 177 /* Update administration, Reset pointers to new arrays */ 178 result->n_alloc_icol = n_alloc; 179 for (i=0; i<nrows; i++) { 180 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 181 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 182 } 183 184 /* Delete old array */ 185 ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr); 186 187 /* Allocate new value-data, copy old contents */ 188 ierr = PetscMalloc1(n_alloc, &result->alloc_val);CHKERRQ(ierr); 189 ierr = PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);CHKERRQ(ierr); 190 191 /* Update administration, Reset pointers to new arrays */ 192 result->n_alloc_val = n_alloc; 193 for (i=0; i<nrows; i++) { 194 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 195 } 196 197 /* Delete old array */ 198 ierr = PetscFree(alloc_val_old);CHKERRQ(ierr); 199 } 200 201 /********************************************************* 202 4. Copy all the arrays to their proper places */ 203 n_row_rescue = 0; n_rescue = 0; 204 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 205 else { 206 i = *n_row_alloc_ok - 1; 207 208 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 209 } 210 211 for (i=*n_row_alloc_ok; i<nrows; i++) { 212 i_here = result->icols[i]-result->alloc_icol; 213 i_last = i_here + result->row_nnz[i]; 214 215 result->icols[i] = result->alloc_icol + *n_alloc_used; 216 result->values[i]= result->alloc_val + *n_alloc_used; 217 218 if (result->row_nnz[i]>0) { 219 if (*n_alloc_used > i_here || i_last > n_alloc) { 220 ierr = PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);CHKERRQ(ierr); 221 ierr = PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]);CHKERRQ(ierr); 222 223 n_rescue += result->row_nnz[i]; 224 n_row_rescue++; 225 } else { 226 for (j=0; j<result->row_nnz[i]; j++) { 227 result->icols[i][j] = result->alloc_icol[i_here+j]; 228 result->values[i][j] = result->alloc_val[i_here+j]; 229 } 230 } 231 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 232 else *n_alloc_used += max_row_nnz[i]; 233 } 234 } 235 236 /* Delete the rescue arrays */ 237 ierr = PetscFree(icol_rescue);CHKERRQ(ierr); 238 ierr = PetscFree(val_rescue);CHKERRQ(ierr); 239 240 *n_row_alloc_ok = i_row; 241 PetscFunctionReturn(0); 242 } 243 244 /* 245 spbas_incomplete_cholesky: 246 incomplete Cholesky decomposition of a square, symmetric, 247 positive definite matrix. 248 249 In case negative diagonals are encountered, function returns 250 NEGATIVE_DIAGONAL. When this happens, call this function again 251 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 252 droptol 253 */ 254 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L) 255 { 256 PetscInt jL; 257 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 258 PetscInt *ai=a->i,*aj=a->j; 259 MatScalar *aa=a->a; 260 PetscInt nrows, ncols; 261 PetscInt *max_row_nnz; 262 PetscErrorCode ierr; 263 spbas_matrix retval; 264 PetscScalar *diag; 265 PetscScalar *val; 266 PetscScalar *lvec; 267 PetscScalar epsdiag; 268 PetscInt i,j,k; 269 const PetscBool do_values = PETSC_TRUE; 270 PetscInt *r1_icol; 271 PetscScalar *r1_val; 272 PetscInt *r_icol; 273 PetscInt r_nnz; 274 PetscScalar *r_val; 275 PetscInt *A_icol; 276 PetscInt A_nnz; 277 PetscScalar *A_val; 278 PetscInt *p_icol; 279 PetscInt p_nnz; 280 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 281 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 282 283 PetscFunctionBegin; 284 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 285 ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr); 286 ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr); 287 288 epsdiag *= epsdiag_in / nrows; 289 290 ierr = PetscInfo2(NULL," Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);CHKERRQ(ierr); 291 292 if (droptol<1e-10) droptol=1e-10; 293 294 if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n"); 295 296 retval.nrows = nrows; 297 retval.ncols = nrows; 298 retval.nnz = pattern.nnz/10; 299 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 300 retval.block_data = PETSC_TRUE; 301 302 ierr = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr); 303 ierr = PetscArrayzero(retval.row_nnz, nrows);CHKERRQ(ierr); 304 ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 305 retval.nnz = 0; 306 307 ierr = PetscMalloc1(nrows, &diag);CHKERRQ(ierr); 308 ierr = PetscCalloc1(nrows, &val);CHKERRQ(ierr); 309 ierr = PetscCalloc1(nrows, &lvec);CHKERRQ(ierr); 310 ierr = PetscCalloc1(nrows, &max_row_nnz);CHKERRQ(ierr); 311 312 /* Check correct format of sparseness pattern */ 313 if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 314 315 /* Count the nonzeros on transpose of pattern */ 316 for (i = 0; i<nrows; i++) { 317 p_nnz = pattern.row_nnz[i]; 318 p_icol = pattern.icols[i]; 319 for (j=0; j<p_nnz; j++) { 320 max_row_nnz[i+p_icol[j]]++; 321 } 322 } 323 324 /* Calculate rows of L */ 325 for (i = 0; i<nrows; i++) { 326 p_nnz = pattern.row_nnz[i]; 327 p_icol = pattern.icols[i]; 328 329 r_nnz = retval.row_nnz[i]; 330 r_icol = retval.icols[i]; 331 r_val = retval.values[i]; 332 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 333 A_icol = &aj[ai[rip[i]]]; 334 A_val = &aa[ai[rip[i]]]; 335 336 /* Calculate val += A(i,i:n)'; */ 337 for (j=0; j<A_nnz; j++) { 338 k = riip[A_icol[j]]; 339 if (k>=i) val[k] = A_val[j]; 340 } 341 342 /* Add regularization */ 343 val[i] += epsdiag; 344 345 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 346 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 347 for (j=0; j<r_nnz; j++) { 348 k = r_icol[j]; 349 lvec[k] = diag[k] * r_val[j]; 350 val[i] -= r_val[j] * lvec[k]; 351 } 352 353 /* Calculate the new diagonal */ 354 diag[i] = val[i]; 355 if (PetscRealPart(diag[i])<droptol) { 356 ierr = PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");CHKERRQ(ierr); 357 ierr = PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);CHKERRQ(ierr); 358 359 /* Delete the whole matrix at once. */ 360 ierr = spbas_delete(retval);CHKERRQ(ierr); 361 return NEGATIVE_DIAGONAL; 362 } 363 364 /* If necessary, allocate arrays */ 365 if (r_nnz==0) { 366 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 367 if (ierr == PETSC_ERR_MEM) { 368 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 369 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr); 370 } else CHKERRQ(ierr); 371 r_icol = retval.icols[i]; 372 r_val = retval.values[i]; 373 } 374 375 /* Now, fill in */ 376 r_icol[r_nnz] = i; 377 r_val [r_nnz] = 1.0; 378 r_nnz++; 379 retval.row_nnz[i]++; 380 381 retval.nnz += r_nnz; 382 383 /* Calculate 384 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 385 for (j=1; j<p_nnz; j++) { 386 k = i+p_icol[j]; 387 r1_icol = retval.icols[k]; 388 r1_val = retval.values[k]; 389 for (jL=0; jL<retval.row_nnz[k]; jL++) { 390 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 391 } 392 val[k] /= diag[i]; 393 394 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) { 395 /* If necessary, allocate arrays */ 396 if (retval.row_nnz[k]==0) { 397 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 398 if (ierr == PETSC_ERR_MEM) { 399 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 400 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr); 401 r_icol = retval.icols[i]; 402 } else CHKERRQ(ierr); 403 } 404 405 retval.icols[k][retval.row_nnz[k]] = i; 406 retval.values[k][retval.row_nnz[k]] = val[k]; 407 retval.row_nnz[k]++; 408 } 409 val[k] = 0; 410 } 411 412 /* Erase the values used in the work arrays */ 413 for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0; 414 } 415 416 ierr = PetscFree(lvec);CHKERRQ(ierr); 417 ierr = PetscFree(val);CHKERRQ(ierr); 418 419 ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 420 ierr = PetscFree(max_row_nnz);CHKERRQ(ierr); 421 422 /* Place the inverse of the diagonals in the matrix */ 423 for (i=0; i<nrows; i++) { 424 r_nnz = retval.row_nnz[i]; 425 426 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 427 for (j=0; j<r_nnz-1; j++) { 428 retval.values[i][j] *= -1; 429 } 430 } 431 ierr = PetscFree(diag);CHKERRQ(ierr); 432 *matrix_L = retval; 433 PetscFunctionReturn(0); 434 } 435