1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 9 { 10 PetscFunctionBegin; 11 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 12 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 13 *n_alloc_used += r_nnz; 14 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM); 15 PetscFunctionReturn(0); 16 } 17 18 19 /* 20 spbas_cholesky_garbage_collect: 21 move the rows which have been calculated so far, as well as 22 those currently under construction, to the front of the 23 array, while putting them in the proper order. 24 When it seems necessary, enlarge the current arrays. 25 26 NB: re-allocation is being simulated using 27 PetscMalloc, memcpy, PetscFree, because 28 PetscRealloc does not seem to exist. 29 30 */ 31 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 32 Only the storage, not the contents of this matrix is changed in this function */ 33 PetscInt i_row, /* I : Number of rows for which the final contents are known */ 34 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 35 places in the arrays: they need not be moved any more */ 36 PetscInt *n_alloc_used, /* I/O: */ 37 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 38 { 39 /* PSEUDO-CODE: 40 1. Choose the appropriate size for the arrays 41 2. Rescue the arrays which would be lost during garbage collection 42 3. Reallocate and correct administration 43 4. Move all arrays so that they are in proper order */ 44 45 PetscInt i,j; 46 PetscInt nrows = result->nrows; 47 PetscInt n_alloc_ok =0; 48 PetscInt n_alloc_ok_max=0; 49 PetscErrorCode ierr; 50 PetscInt need_already = 0; 51 PetscInt n_rows_ahead =0; 52 PetscInt max_need_extra= 0; 53 PetscInt n_alloc_max, n_alloc_est, n_alloc; 54 PetscInt n_alloc_now = result->n_alloc_icol; 55 PetscInt *alloc_icol_old = result->alloc_icol; 56 PetscScalar *alloc_val_old = result->alloc_val; 57 PetscInt *icol_rescue; 58 PetscScalar *val_rescue; 59 PetscInt n_rescue; 60 PetscInt n_row_rescue; 61 PetscInt i_here, i_last, n_copy; 62 const PetscReal xtra_perc = 20; 63 64 PetscFunctionBegin; 65 /********************************************************* 66 1. Choose appropriate array size 67 Count number of rows and memory usage which is already final */ 68 for (i=0; i<i_row; i++) { 69 n_alloc_ok += result->row_nnz[i]; 70 n_alloc_ok_max += max_row_nnz[i]; 71 } 72 73 /* Count currently needed memory usage and future memory requirements 74 (max, predicted)*/ 75 for (i=i_row; i<nrows; i++) { 76 if (!result->row_nnz[i]) { 77 max_need_extra += max_row_nnz[i]; 78 } else { 79 need_already += max_row_nnz[i]; 80 n_rows_ahead++; 81 } 82 } 83 84 /* Make maximal and realistic memory requirement estimates */ 85 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 86 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 87 88 /* Choose array sizes */ 89 if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max; 90 else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now; 91 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max; 92 else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); 93 94 /* If new estimate is less than what we already have, 95 don't reallocate, just garbage-collect */ 96 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 97 n_alloc = result->n_alloc_icol; 98 } 99 100 /* Motivate dimension choice */ 101 ierr = PetscInfo1(NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 102 if (n_alloc_max == n_alloc_est) { 103 ierr = PetscInfo(NULL,"this is the correct size\n");CHKERRQ(ierr); 104 } else if (n_alloc_now >= n_alloc_est) { 105 ierr = PetscInfo(NULL,"the current size, which seems enough\n");CHKERRQ(ierr); 106 } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { 107 ierr = PetscInfo(NULL,"the maximum estimate\n");CHKERRQ(ierr); 108 } else { 109 ierr = PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); 110 } 111 112 113 /********************************************************** 114 2. Rescue arrays which would be lost 115 Count how many rows and nonzeros will have to be rescued 116 when moving all arrays in place */ 117 n_row_rescue = 0; n_rescue = 0; 118 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 119 else { 120 i = *n_row_alloc_ok - 1; 121 122 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 123 } 124 125 for (i=*n_row_alloc_ok; i<nrows; i++) { 126 i_here = result->icols[i]-result->alloc_icol; 127 i_last = i_here + result->row_nnz[i]; 128 if (result->row_nnz[i]>0) { 129 if (*n_alloc_used > i_here || i_last > n_alloc) { 130 n_rescue += result->row_nnz[i]; 131 n_row_rescue++; 132 } 133 134 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 135 else *n_alloc_used += max_row_nnz[i]; 136 } 137 } 138 139 /* Allocate rescue arrays */ 140 ierr = PetscMalloc1(n_rescue, &icol_rescue);CHKERRQ(ierr); 141 ierr = PetscMalloc1(n_rescue, &val_rescue);CHKERRQ(ierr); 142 143 /* Rescue the arrays which need rescuing */ 144 n_row_rescue = 0; n_rescue = 0; 145 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 146 else { 147 i = *n_row_alloc_ok - 1; 148 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 149 } 150 151 for (i=*n_row_alloc_ok; i<nrows; i++) { 152 i_here = result->icols[i]-result->alloc_icol; 153 i_last = i_here + result->row_nnz[i]; 154 if (result->row_nnz[i]>0) { 155 if (*n_alloc_used > i_here || i_last > n_alloc) { 156 ierr = PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);CHKERRQ(ierr); 157 ierr = PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);CHKERRQ(ierr); 158 n_rescue += result->row_nnz[i]; 159 n_row_rescue++; 160 } 161 162 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 163 else *n_alloc_used += max_row_nnz[i]; 164 } 165 } 166 167 /********************************************************** 168 3. Reallocate and correct administration */ 169 170 if (n_alloc != result->n_alloc_icol) { 171 n_copy = PetscMin(n_alloc,result->n_alloc_icol); 172 173 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 174 175 Allocate new icol-data, copy old contents */ 176 ierr = PetscMalloc1(n_alloc, &result->alloc_icol);CHKERRQ(ierr); 177 ierr = PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);CHKERRQ(ierr); 178 179 /* Update administration, Reset pointers to new arrays */ 180 result->n_alloc_icol = n_alloc; 181 for (i=0; i<nrows; i++) { 182 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 183 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 184 } 185 186 /* Delete old array */ 187 ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr); 188 189 /* Allocate new value-data, copy old contents */ 190 ierr = PetscMalloc1(n_alloc, &result->alloc_val);CHKERRQ(ierr); 191 ierr = PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);CHKERRQ(ierr); 192 193 /* Update administration, Reset pointers to new arrays */ 194 result->n_alloc_val = n_alloc; 195 for (i=0; i<nrows; i++) { 196 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 197 } 198 199 /* Delete old array */ 200 ierr = PetscFree(alloc_val_old);CHKERRQ(ierr); 201 } 202 203 204 /********************************************************* 205 4. Copy all the arrays to their proper places */ 206 n_row_rescue = 0; n_rescue = 0; 207 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 208 else { 209 i = *n_row_alloc_ok - 1; 210 211 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 212 } 213 214 for (i=*n_row_alloc_ok; i<nrows; i++) { 215 i_here = result->icols[i]-result->alloc_icol; 216 i_last = i_here + result->row_nnz[i]; 217 218 result->icols[i] = result->alloc_icol + *n_alloc_used; 219 result->values[i]= result->alloc_val + *n_alloc_used; 220 221 if (result->row_nnz[i]>0) { 222 if (*n_alloc_used > i_here || i_last > n_alloc) { 223 ierr = PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);CHKERRQ(ierr); 224 ierr = PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]);CHKERRQ(ierr); 225 226 n_rescue += result->row_nnz[i]; 227 n_row_rescue++; 228 } else { 229 for (j=0; j<result->row_nnz[i]; j++) { 230 result->icols[i][j] = result->alloc_icol[i_here+j]; 231 result->values[i][j] = result->alloc_val[i_here+j]; 232 } 233 } 234 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 235 else *n_alloc_used += max_row_nnz[i]; 236 } 237 } 238 239 /* Delete the rescue arrays */ 240 ierr = PetscFree(icol_rescue);CHKERRQ(ierr); 241 ierr = PetscFree(val_rescue);CHKERRQ(ierr); 242 243 *n_row_alloc_ok = i_row; 244 PetscFunctionReturn(0); 245 } 246 247 /* 248 spbas_incomplete_cholesky: 249 incomplete Cholesky decomposition of a square, symmetric, 250 positive definite matrix. 251 252 In case negative diagonals are encountered, function returns 253 NEGATIVE_DIAGONAL. When this happens, call this function again 254 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 255 droptol 256 */ 257 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L) 258 { 259 PetscInt jL; 260 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 261 PetscInt *ai=a->i,*aj=a->j; 262 MatScalar *aa=a->a; 263 PetscInt nrows, ncols; 264 PetscInt *max_row_nnz; 265 PetscErrorCode ierr; 266 spbas_matrix retval; 267 PetscScalar *diag; 268 PetscScalar *val; 269 PetscScalar *lvec; 270 PetscScalar epsdiag; 271 PetscInt i,j,k; 272 const PetscBool do_values = PETSC_TRUE; 273 PetscInt *r1_icol; 274 PetscScalar *r1_val; 275 PetscInt *r_icol; 276 PetscInt r_nnz; 277 PetscScalar *r_val; 278 PetscInt *A_icol; 279 PetscInt A_nnz; 280 PetscScalar *A_val; 281 PetscInt *p_icol; 282 PetscInt p_nnz; 283 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 284 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 285 286 PetscFunctionBegin; 287 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 288 ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr); 289 ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr); 290 291 epsdiag *= epsdiag_in / nrows; 292 293 ierr = PetscInfo2(NULL," Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);CHKERRQ(ierr); 294 295 if (droptol<1e-10) droptol=1e-10; 296 297 if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n"); 298 299 retval.nrows = nrows; 300 retval.ncols = nrows; 301 retval.nnz = pattern.nnz/10; 302 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 303 retval.block_data = PETSC_TRUE; 304 305 ierr = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr); 306 ierr = PetscArrayzero(retval.row_nnz, nrows);CHKERRQ(ierr); 307 ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 308 retval.nnz = 0; 309 310 ierr = PetscMalloc1(nrows, &diag);CHKERRQ(ierr); 311 ierr = PetscCalloc1(nrows, &val);CHKERRQ(ierr); 312 ierr = PetscCalloc1(nrows, &lvec);CHKERRQ(ierr); 313 ierr = PetscCalloc1(nrows, &max_row_nnz);CHKERRQ(ierr); 314 315 /* Check correct format of sparseness pattern */ 316 if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 317 318 /* Count the nonzeros on transpose of pattern */ 319 for (i = 0; i<nrows; i++) { 320 p_nnz = pattern.row_nnz[i]; 321 p_icol = pattern.icols[i]; 322 for (j=0; j<p_nnz; j++) { 323 max_row_nnz[i+p_icol[j]]++; 324 } 325 } 326 327 /* Calculate rows of L */ 328 for (i = 0; i<nrows; i++) { 329 p_nnz = pattern.row_nnz[i]; 330 p_icol = pattern.icols[i]; 331 332 r_nnz = retval.row_nnz[i]; 333 r_icol = retval.icols[i]; 334 r_val = retval.values[i]; 335 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 336 A_icol = &aj[ai[rip[i]]]; 337 A_val = &aa[ai[rip[i]]]; 338 339 /* Calculate val += A(i,i:n)'; */ 340 for (j=0; j<A_nnz; j++) { 341 k = riip[A_icol[j]]; 342 if (k>=i) val[k] = A_val[j]; 343 } 344 345 /* Add regularization */ 346 val[i] += epsdiag; 347 348 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 349 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 350 for (j=0; j<r_nnz; j++) { 351 k = r_icol[j]; 352 lvec[k] = diag[k] * r_val[j]; 353 val[i] -= r_val[j] * lvec[k]; 354 } 355 356 /* Calculate the new diagonal */ 357 diag[i] = val[i]; 358 if (PetscRealPart(diag[i])<droptol) { 359 ierr = PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");CHKERRQ(ierr); 360 ierr = PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);CHKERRQ(ierr); 361 362 /* Delete the whole matrix at once. */ 363 ierr = spbas_delete(retval);CHKERRQ(ierr); 364 return NEGATIVE_DIAGONAL; 365 } 366 367 /* If necessary, allocate arrays */ 368 if (r_nnz==0) { 369 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 370 if (ierr == PETSC_ERR_MEM) { 371 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 372 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr); 373 } else CHKERRQ(ierr); 374 r_icol = retval.icols[i]; 375 r_val = retval.values[i]; 376 } 377 378 /* Now, fill in */ 379 r_icol[r_nnz] = i; 380 r_val [r_nnz] = 1.0; 381 r_nnz++; 382 retval.row_nnz[i]++; 383 384 retval.nnz += r_nnz; 385 386 /* Calculate 387 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 388 for (j=1; j<p_nnz; j++) { 389 k = i+p_icol[j]; 390 r1_icol = retval.icols[k]; 391 r1_val = retval.values[k]; 392 for (jL=0; jL<retval.row_nnz[k]; jL++) { 393 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 394 } 395 val[k] /= diag[i]; 396 397 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) { 398 /* If necessary, allocate arrays */ 399 if (retval.row_nnz[k]==0) { 400 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 401 if (ierr == PETSC_ERR_MEM) { 402 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 403 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr); 404 r_icol = retval.icols[i]; 405 } else CHKERRQ(ierr); 406 } 407 408 retval.icols[k][retval.row_nnz[k]] = i; 409 retval.values[k][retval.row_nnz[k]] = val[k]; 410 retval.row_nnz[k]++; 411 } 412 val[k] = 0; 413 } 414 415 /* Erase the values used in the work arrays */ 416 for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0; 417 } 418 419 ierr=PetscFree(lvec);CHKERRQ(ierr); 420 ierr=PetscFree(val);CHKERRQ(ierr); 421 422 ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 423 ierr = PetscFree(max_row_nnz);CHKERRQ(ierr); 424 425 /* Place the inverse of the diagonals in the matrix */ 426 for (i=0; i<nrows; i++) { 427 r_nnz = retval.row_nnz[i]; 428 429 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 430 for (j=0; j<r_nnz-1; j++) { 431 retval.values[i][j] *= -1; 432 } 433 } 434 ierr = PetscFree(diag);CHKERRQ(ierr); 435 *matrix_L = retval; 436 PetscFunctionReturn(0); 437 } 438 439