1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 9 { 10 PetscFunctionBegin; 11 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 12 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 13 *n_alloc_used += r_nnz; 14 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_FALSE); 15 PetscFunctionReturn(PETSC_TRUE); 16 } 17 18 /* 19 spbas_cholesky_garbage_collect: 20 move the rows which have been calculated so far, as well as 21 those currently under construction, to the front of the 22 array, while putting them in the proper order. 23 When it seems necessary, enlarge the current arrays. 24 25 NB: re-allocation is being simulated using 26 PetscMalloc, memcpy, PetscFree, because 27 PetscRealloc does not seem to exist. 28 29 */ 30 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 31 Only the storage, not the contents of this matrix is changed in this function */ 32 PetscInt i_row, /* I : Number of rows for which the final contents are known */ 33 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 34 places in the arrays: they need not be moved any more */ 35 PetscInt *n_alloc_used, /* I/O: */ 36 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 37 { 38 /* PSEUDO-CODE: 39 1. Choose the appropriate size for the arrays 40 2. Rescue the arrays which would be lost during garbage collection 41 3. Reallocate and correct administration 42 4. Move all arrays so that they are in proper order */ 43 44 PetscInt i,j; 45 PetscInt nrows = result->nrows; 46 PetscInt n_alloc_ok =0; 47 PetscInt n_alloc_ok_max=0; 48 PetscErrorCode ierr; 49 PetscInt need_already = 0; 50 PetscInt n_rows_ahead =0; 51 PetscInt max_need_extra= 0; 52 PetscInt n_alloc_max, n_alloc_est, n_alloc; 53 PetscInt n_alloc_now = result->n_alloc_icol; 54 PetscInt *alloc_icol_old = result->alloc_icol; 55 PetscScalar *alloc_val_old = result->alloc_val; 56 PetscInt *icol_rescue; 57 PetscScalar *val_rescue; 58 PetscInt n_rescue; 59 PetscInt n_row_rescue; 60 PetscInt i_here, i_last, n_copy; 61 const PetscReal xtra_perc = 20; 62 63 PetscFunctionBegin; 64 /********************************************************* 65 1. Choose appropriate array size 66 Count number of rows and memory usage which is already final */ 67 for (i=0; i<i_row; i++) { 68 n_alloc_ok += result->row_nnz[i]; 69 n_alloc_ok_max += max_row_nnz[i]; 70 } 71 72 /* Count currently needed memory usage and future memory requirements 73 (max, predicted)*/ 74 for (i=i_row; i<nrows; i++) { 75 if (!result->row_nnz[i]) { 76 max_need_extra += max_row_nnz[i]; 77 } else { 78 need_already += max_row_nnz[i]; 79 n_rows_ahead++; 80 } 81 } 82 83 /* Make maximal and realistic memory requirement estimates */ 84 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 85 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 86 87 /* Choose array sizes */ 88 if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max; 89 else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now; 90 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max; 91 else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); 92 93 /* If new estimate is less than what we already have, 94 don't reallocate, just garbage-collect */ 95 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 96 n_alloc = result->n_alloc_icol; 97 } 98 99 /* Motivate dimension choice */ 100 ierr = PetscInfo1(NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 101 if (n_alloc_max == n_alloc_est) { 102 ierr = PetscInfo(NULL,"this is the correct size\n");CHKERRQ(ierr); 103 } else if (n_alloc_now >= n_alloc_est) { 104 ierr = PetscInfo(NULL,"the current size, which seems enough\n");CHKERRQ(ierr); 105 } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { 106 ierr = PetscInfo(NULL,"the maximum estimate\n");CHKERRQ(ierr); 107 } else { 108 ierr = PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); 109 } 110 111 /********************************************************** 112 2. Rescue arrays which would be lost 113 Count how many rows and nonzeros will have to be rescued 114 when moving all arrays in place */ 115 n_row_rescue = 0; n_rescue = 0; 116 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 117 else { 118 i = *n_row_alloc_ok - 1; 119 120 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 121 } 122 123 for (i=*n_row_alloc_ok; i<nrows; i++) { 124 i_here = result->icols[i]-result->alloc_icol; 125 i_last = i_here + result->row_nnz[i]; 126 if (result->row_nnz[i]>0) { 127 if (*n_alloc_used > i_here || i_last > n_alloc) { 128 n_rescue += result->row_nnz[i]; 129 n_row_rescue++; 130 } 131 132 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 133 else *n_alloc_used += max_row_nnz[i]; 134 } 135 } 136 137 /* Allocate rescue arrays */ 138 ierr = PetscMalloc1(n_rescue, &icol_rescue);CHKERRQ(ierr); 139 ierr = PetscMalloc1(n_rescue, &val_rescue);CHKERRQ(ierr); 140 141 /* Rescue the arrays which need rescuing */ 142 n_row_rescue = 0; n_rescue = 0; 143 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 144 else { 145 i = *n_row_alloc_ok - 1; 146 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 147 } 148 149 for (i=*n_row_alloc_ok; i<nrows; i++) { 150 i_here = result->icols[i]-result->alloc_icol; 151 i_last = i_here + result->row_nnz[i]; 152 if (result->row_nnz[i]>0) { 153 if (*n_alloc_used > i_here || i_last > n_alloc) { 154 ierr = PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);CHKERRQ(ierr); 155 ierr = PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);CHKERRQ(ierr); 156 n_rescue += result->row_nnz[i]; 157 n_row_rescue++; 158 } 159 160 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 161 else *n_alloc_used += max_row_nnz[i]; 162 } 163 } 164 165 /********************************************************** 166 3. Reallocate and correct administration */ 167 168 if (n_alloc != result->n_alloc_icol) { 169 n_copy = PetscMin(n_alloc,result->n_alloc_icol); 170 171 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 172 173 Allocate new icol-data, copy old contents */ 174 ierr = PetscMalloc1(n_alloc, &result->alloc_icol);CHKERRQ(ierr); 175 ierr = PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);CHKERRQ(ierr); 176 177 /* Update administration, Reset pointers to new arrays */ 178 result->n_alloc_icol = n_alloc; 179 for (i=0; i<nrows; i++) { 180 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 181 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 182 } 183 184 /* Delete old array */ 185 ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr); 186 187 /* Allocate new value-data, copy old contents */ 188 ierr = PetscMalloc1(n_alloc, &result->alloc_val);CHKERRQ(ierr); 189 ierr = PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);CHKERRQ(ierr); 190 191 /* Update administration, Reset pointers to new arrays */ 192 result->n_alloc_val = n_alloc; 193 for (i=0; i<nrows; i++) { 194 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 195 } 196 197 /* Delete old array */ 198 ierr = PetscFree(alloc_val_old);CHKERRQ(ierr); 199 } 200 201 /********************************************************* 202 4. Copy all the arrays to their proper places */ 203 n_row_rescue = 0; n_rescue = 0; 204 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 205 else { 206 i = *n_row_alloc_ok - 1; 207 208 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 209 } 210 211 for (i=*n_row_alloc_ok; i<nrows; i++) { 212 i_here = result->icols[i]-result->alloc_icol; 213 i_last = i_here + result->row_nnz[i]; 214 215 result->icols[i] = result->alloc_icol + *n_alloc_used; 216 result->values[i]= result->alloc_val + *n_alloc_used; 217 218 if (result->row_nnz[i]>0) { 219 if (*n_alloc_used > i_here || i_last > n_alloc) { 220 ierr = PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);CHKERRQ(ierr); 221 ierr = PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]);CHKERRQ(ierr); 222 223 n_rescue += result->row_nnz[i]; 224 n_row_rescue++; 225 } else { 226 for (j=0; j<result->row_nnz[i]; j++) { 227 result->icols[i][j] = result->alloc_icol[i_here+j]; 228 result->values[i][j] = result->alloc_val[i_here+j]; 229 } 230 } 231 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 232 else *n_alloc_used += max_row_nnz[i]; 233 } 234 } 235 236 /* Delete the rescue arrays */ 237 ierr = PetscFree(icol_rescue);CHKERRQ(ierr); 238 ierr = PetscFree(val_rescue);CHKERRQ(ierr); 239 240 *n_row_alloc_ok = i_row; 241 PetscFunctionReturn(0); 242 } 243 244 /* 245 spbas_incomplete_cholesky: 246 incomplete Cholesky decomposition of a square, symmetric, 247 positive definite matrix. 248 249 In case negative diagonals are encountered, function returns 250 NEGATIVE_DIAGONAL. When this happens, call this function again 251 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 252 droptol 253 */ 254 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L,PetscBool *success) 255 { 256 PetscInt jL; 257 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 258 PetscInt *ai=a->i,*aj=a->j; 259 MatScalar *aa=a->a; 260 PetscInt nrows, ncols; 261 PetscInt *max_row_nnz; 262 PetscErrorCode ierr; 263 spbas_matrix retval; 264 PetscScalar *diag; 265 PetscScalar *val; 266 PetscScalar *lvec; 267 PetscScalar epsdiag; 268 PetscInt i,j,k; 269 const PetscBool do_values = PETSC_TRUE; 270 PetscInt *r1_icol; 271 PetscScalar *r1_val; 272 PetscInt *r_icol; 273 PetscInt r_nnz; 274 PetscScalar *r_val; 275 PetscInt *A_icol; 276 PetscInt A_nnz; 277 PetscScalar *A_val; 278 PetscInt *p_icol; 279 PetscInt p_nnz; 280 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 281 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 282 283 PetscFunctionBegin; 284 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 285 ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr); 286 ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr); 287 288 epsdiag *= epsdiag_in / nrows; 289 290 ierr = PetscInfo2(NULL," Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);CHKERRQ(ierr); 291 292 if (droptol<1e-10) droptol=1e-10; 293 294 retval.nrows = nrows; 295 retval.ncols = nrows; 296 retval.nnz = pattern.nnz/10; 297 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 298 retval.block_data = PETSC_TRUE; 299 300 ierr = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr); 301 ierr = PetscArrayzero(retval.row_nnz, nrows);CHKERRQ(ierr); 302 ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 303 retval.nnz = 0; 304 305 ierr = PetscMalloc1(nrows, &diag);CHKERRQ(ierr); 306 ierr = PetscCalloc1(nrows, &val);CHKERRQ(ierr); 307 ierr = PetscCalloc1(nrows, &lvec);CHKERRQ(ierr); 308 ierr = PetscCalloc1(nrows, &max_row_nnz);CHKERRQ(ierr); 309 310 /* Count the nonzeros on transpose of pattern */ 311 for (i = 0; i<nrows; i++) { 312 p_nnz = pattern.row_nnz[i]; 313 p_icol = pattern.icols[i]; 314 for (j=0; j<p_nnz; j++) { 315 max_row_nnz[i+p_icol[j]]++; 316 } 317 } 318 319 /* Calculate rows of L */ 320 for (i = 0; i<nrows; i++) { 321 p_nnz = pattern.row_nnz[i]; 322 p_icol = pattern.icols[i]; 323 324 r_nnz = retval.row_nnz[i]; 325 r_icol = retval.icols[i]; 326 r_val = retval.values[i]; 327 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 328 A_icol = &aj[ai[rip[i]]]; 329 A_val = &aa[ai[rip[i]]]; 330 331 /* Calculate val += A(i,i:n)'; */ 332 for (j=0; j<A_nnz; j++) { 333 k = riip[A_icol[j]]; 334 if (k>=i) val[k] = A_val[j]; 335 } 336 337 /* Add regularization */ 338 val[i] += epsdiag; 339 340 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 341 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 342 for (j=0; j<r_nnz; j++) { 343 k = r_icol[j]; 344 lvec[k] = diag[k] * r_val[j]; 345 val[i] -= r_val[j] * lvec[k]; 346 } 347 348 /* Calculate the new diagonal */ 349 diag[i] = val[i]; 350 if (PetscRealPart(diag[i])<droptol) { 351 ierr = PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");CHKERRQ(ierr); 352 ierr = PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);CHKERRQ(ierr); 353 354 /* Delete the whole matrix at once. */ 355 ierr = spbas_delete(retval);CHKERRQ(ierr); 356 *success = PETSC_FALSE; 357 PetscFunctionReturn(0); 358 } 359 360 /* If necessary, allocate arrays */ 361 if (r_nnz==0) { 362 PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 363 if (!success) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"spbas_cholesky_row_alloc() failed"); 364 r_icol = retval.icols[i]; 365 r_val = retval.values[i]; 366 } 367 368 /* Now, fill in */ 369 r_icol[r_nnz] = i; 370 r_val [r_nnz] = 1.0; 371 r_nnz++; 372 retval.row_nnz[i]++; 373 374 retval.nnz += r_nnz; 375 376 /* Calculate 377 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 378 for (j=1; j<p_nnz; j++) { 379 k = i+p_icol[j]; 380 r1_icol = retval.icols[k]; 381 r1_val = retval.values[k]; 382 for (jL=0; jL<retval.row_nnz[k]; jL++) { 383 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 384 } 385 val[k] /= diag[i]; 386 387 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) { 388 /* If necessary, allocate arrays */ 389 if (!retval.row_nnz[k]) { 390 PetscBool flag,success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 391 if (!success) { 392 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 393 flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 394 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"Allocation in spbas_cholesky_row_alloc() failed"); 395 r_icol = retval.icols[i]; 396 } 397 } 398 399 retval.icols[k][retval.row_nnz[k]] = i; 400 retval.values[k][retval.row_nnz[k]] = val[k]; 401 retval.row_nnz[k]++; 402 } 403 val[k] = 0; 404 } 405 406 /* Erase the values used in the work arrays */ 407 for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0; 408 } 409 410 ierr = PetscFree(lvec);CHKERRQ(ierr); 411 ierr = PetscFree(val);CHKERRQ(ierr); 412 413 ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 414 ierr = PetscFree(max_row_nnz);CHKERRQ(ierr); 415 416 /* Place the inverse of the diagonals in the matrix */ 417 for (i=0; i<nrows; i++) { 418 r_nnz = retval.row_nnz[i]; 419 420 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 421 for (j=0; j<r_nnz-1; j++) { 422 retval.values[i][j] *= -1; 423 } 424 } 425 ierr = PetscFree(diag);CHKERRQ(ierr); 426 *matrix_L = retval; 427 *success = PETSC_TRUE; 428 PetscFunctionReturn(0); 429 } 430