1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 9 { 10 PetscFunctionBegin; 11 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 12 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 13 *n_alloc_used += r_nnz; 14 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM); 15 PetscFunctionReturn(0); 16 } 17 18 19 /* 20 spbas_cholesky_garbage_collect: 21 move the rows which have been calculated so far, as well as 22 those currently under construction, to the front of the 23 array, while putting them in the proper order. 24 When it seems necessary, enlarge the current arrays. 25 26 NB: re-allocation is being simulated using 27 PetscMalloc, memcpy, PetscFree, because 28 PetscRealloc does not seem to exist. 29 30 */ 31 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 32 Only the storage, not the contents of this matrix is changed in this function */ 33 PetscInt i_row, /* I : Number of rows for which the final contents are known */ 34 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 35 places in the arrays: they need not be moved any more */ 36 PetscInt *n_alloc_used, /* I/O: */ 37 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 38 { 39 /* PSEUDO-CODE: 40 1. Choose the appropriate size for the arrays 41 2. Rescue the arrays which would be lost during garbage collection 42 3. Reallocate and correct administration 43 4. Move all arrays so that they are in proper order */ 44 45 PetscInt i,j; 46 PetscInt nrows = result->nrows; 47 PetscInt n_alloc_ok =0; 48 PetscInt n_alloc_ok_max=0; 49 PetscErrorCode ierr; 50 PetscInt need_already = 0; 51 PetscInt n_rows_ahead =0; 52 PetscInt max_need_extra= 0; 53 PetscInt n_alloc_max, n_alloc_est, n_alloc; 54 PetscInt n_alloc_now = result->n_alloc_icol; 55 PetscInt *alloc_icol_old = result->alloc_icol; 56 PetscScalar *alloc_val_old = result->alloc_val; 57 PetscInt *icol_rescue; 58 PetscScalar *val_rescue; 59 PetscInt n_rescue; 60 PetscInt n_row_rescue; 61 PetscInt i_here, i_last, n_copy; 62 const PetscReal xtra_perc = 20; 63 64 PetscFunctionBegin; 65 /********************************************************* 66 1. Choose appropriate array size 67 Count number of rows and memory usage which is already final */ 68 for (i=0; i<i_row; i++) { 69 n_alloc_ok += result->row_nnz[i]; 70 n_alloc_ok_max += max_row_nnz[i]; 71 } 72 73 /* Count currently needed memory usage and future memory requirements 74 (max, predicted)*/ 75 for (i=i_row; i<nrows; i++) { 76 if (!result->row_nnz[i]) { 77 max_need_extra += max_row_nnz[i]; 78 } else { 79 need_already += max_row_nnz[i]; 80 n_rows_ahead++; 81 } 82 } 83 84 /* Make maximal and realistic memory requirement estimates */ 85 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 86 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 87 88 /* Choose array sizes */ 89 if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max; 90 else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now; 91 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max; 92 else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); 93 94 /* If new estimate is less than what we already have, 95 don't reallocate, just garbage-collect */ 96 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 97 n_alloc = result->n_alloc_icol; 98 } 99 100 /* Motivate dimension choice */ 101 ierr = PetscInfo1(NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 102 if (n_alloc_max == n_alloc_est) { 103 ierr = PetscInfo(NULL,"this is the correct size\n");CHKERRQ(ierr); 104 } else if (n_alloc_now >= n_alloc_est) { 105 ierr = PetscInfo(NULL,"the current size, which seems enough\n");CHKERRQ(ierr); 106 } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { 107 ierr = PetscInfo(NULL,"the maximum estimate\n");CHKERRQ(ierr); 108 } else { 109 ierr = PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); 110 } 111 112 113 /********************************************************** 114 2. Rescue arrays which would be lost 115 Count how many rows and nonzeros will have to be rescued 116 when moving all arrays in place */ 117 n_row_rescue = 0; n_rescue = 0; 118 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 119 else { 120 i = *n_row_alloc_ok - 1; 121 122 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 123 } 124 125 for (i=*n_row_alloc_ok; i<nrows; i++) { 126 i_here = result->icols[i]-result->alloc_icol; 127 i_last = i_here + result->row_nnz[i]; 128 if (result->row_nnz[i]>0) { 129 if (*n_alloc_used > i_here || i_last > n_alloc) { 130 n_rescue += result->row_nnz[i]; 131 n_row_rescue++; 132 } 133 134 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 135 else *n_alloc_used += max_row_nnz[i]; 136 } 137 } 138 139 /* Allocate rescue arrays */ 140 ierr = PetscMalloc1(n_rescue, &icol_rescue);CHKERRQ(ierr); 141 ierr = PetscMalloc1(n_rescue, &val_rescue);CHKERRQ(ierr); 142 143 /* Rescue the arrays which need rescuing */ 144 n_row_rescue = 0; n_rescue = 0; 145 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 146 else { 147 i = *n_row_alloc_ok - 1; 148 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 149 } 150 151 for (i=*n_row_alloc_ok; i<nrows; i++) { 152 i_here = result->icols[i]-result->alloc_icol; 153 i_last = i_here + result->row_nnz[i]; 154 if (result->row_nnz[i]>0) { 155 if (*n_alloc_used > i_here || i_last > n_alloc) { 156 ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(PetscInt));CHKERRQ(ierr); 157 ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 158 159 n_rescue += result->row_nnz[i]; 160 n_row_rescue++; 161 } 162 163 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 164 else *n_alloc_used += max_row_nnz[i]; 165 } 166 } 167 168 /********************************************************** 169 3. Reallocate and correct administration */ 170 171 if (n_alloc != result->n_alloc_icol) { 172 n_copy = PetscMin(n_alloc,result->n_alloc_icol); 173 174 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 175 176 Allocate new icol-data, copy old contents */ 177 ierr = PetscMalloc1(n_alloc, &result->alloc_icol);CHKERRQ(ierr); 178 ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(PetscInt));CHKERRQ(ierr); 179 180 /* Update administration, Reset pointers to new arrays */ 181 result->n_alloc_icol = n_alloc; 182 for (i=0; i<nrows; i++) { 183 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 184 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 185 } 186 187 /* Delete old array */ 188 ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr); 189 190 /* Allocate new value-data, copy old contents */ 191 ierr = PetscMalloc1(n_alloc, &result->alloc_val);CHKERRQ(ierr); 192 ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr); 193 194 /* Update administration, Reset pointers to new arrays */ 195 result->n_alloc_val = n_alloc; 196 for (i=0; i<nrows; i++) { 197 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 198 } 199 200 /* Delete old array */ 201 ierr = PetscFree(alloc_val_old);CHKERRQ(ierr); 202 } 203 204 205 /********************************************************* 206 4. Copy all the arrays to their proper places */ 207 n_row_rescue = 0; n_rescue = 0; 208 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 209 else { 210 i = *n_row_alloc_ok - 1; 211 212 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 213 } 214 215 for (i=*n_row_alloc_ok; i<nrows; i++) { 216 i_here = result->icols[i]-result->alloc_icol; 217 i_last = i_here + result->row_nnz[i]; 218 219 result->icols[i] = result->alloc_icol + *n_alloc_used; 220 result->values[i]= result->alloc_val + *n_alloc_used; 221 222 if (result->row_nnz[i]>0) { 223 if (*n_alloc_used > i_here || i_last > n_alloc) { 224 ierr = PetscMemcpy((void*) result->icols[i], (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(PetscInt));CHKERRQ(ierr); 225 ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 226 227 n_rescue += result->row_nnz[i]; 228 n_row_rescue++; 229 } else { 230 for (j=0; j<result->row_nnz[i]; j++) { 231 result->icols[i][j] = result->alloc_icol[i_here+j]; 232 result->values[i][j] = result->alloc_val[i_here+j]; 233 } 234 } 235 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 236 else *n_alloc_used += max_row_nnz[i]; 237 } 238 } 239 240 /* Delete the rescue arrays */ 241 ierr = PetscFree(icol_rescue);CHKERRQ(ierr); 242 ierr = PetscFree(val_rescue);CHKERRQ(ierr); 243 244 *n_row_alloc_ok = i_row; 245 PetscFunctionReturn(0); 246 } 247 248 /* 249 spbas_incomplete_cholesky: 250 incomplete Cholesky decomposition of a square, symmetric, 251 positive definite matrix. 252 253 In case negative diagonals are encountered, function returns 254 NEGATIVE_DIAGONAL. When this happens, call this function again 255 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 256 droptol 257 */ 258 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L) 259 { 260 PetscInt jL; 261 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 262 PetscInt *ai=a->i,*aj=a->j; 263 MatScalar *aa=a->a; 264 PetscInt nrows, ncols; 265 PetscInt *max_row_nnz; 266 PetscErrorCode ierr; 267 spbas_matrix retval; 268 PetscScalar *diag; 269 PetscScalar *val; 270 PetscScalar *lvec; 271 PetscScalar epsdiag; 272 PetscInt i,j,k; 273 const PetscBool do_values = PETSC_TRUE; 274 PetscInt *r1_icol; 275 PetscScalar *r1_val; 276 PetscInt *r_icol; 277 PetscInt r_nnz; 278 PetscScalar *r_val; 279 PetscInt *A_icol; 280 PetscInt A_nnz; 281 PetscScalar *A_val; 282 PetscInt *p_icol; 283 PetscInt p_nnz; 284 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 285 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 286 287 PetscFunctionBegin; 288 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 289 ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr); 290 ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr); 291 292 epsdiag *= epsdiag_in / nrows; 293 294 ierr = PetscInfo2(NULL," Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);CHKERRQ(ierr); 295 296 if (droptol<1e-10) droptol=1e-10; 297 298 if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n"); 299 300 retval.nrows = nrows; 301 retval.ncols = nrows; 302 retval.nnz = pattern.nnz/10; 303 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 304 retval.block_data = PETSC_TRUE; 305 306 ierr = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr); 307 ierr = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(PetscInt));CHKERRQ(ierr); 308 ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 309 retval.nnz = 0; 310 311 ierr = PetscMalloc1(nrows, &diag);CHKERRQ(ierr); 312 ierr = PetscMalloc1(nrows, &val);CHKERRQ(ierr); 313 ierr = PetscMalloc1(nrows, &lvec);CHKERRQ(ierr); 314 ierr = PetscMalloc1(nrows, &max_row_nnz);CHKERRQ(ierr); 315 316 ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 317 ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 318 ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(PetscInt));CHKERRQ(ierr); 319 320 /* Check correct format of sparseness pattern */ 321 if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 322 323 /* Count the nonzeros on transpose of pattern */ 324 for (i = 0; i<nrows; i++) { 325 p_nnz = pattern.row_nnz[i]; 326 p_icol = pattern.icols[i]; 327 for (j=0; j<p_nnz; j++) { 328 max_row_nnz[i+p_icol[j]]++; 329 } 330 } 331 332 /* Calculate rows of L */ 333 for (i = 0; i<nrows; i++) { 334 p_nnz = pattern.row_nnz[i]; 335 p_icol = pattern.icols[i]; 336 337 r_nnz = retval.row_nnz[i]; 338 r_icol = retval.icols[i]; 339 r_val = retval.values[i]; 340 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 341 A_icol = &aj[ai[rip[i]]]; 342 A_val = &aa[ai[rip[i]]]; 343 344 /* Calculate val += A(i,i:n)'; */ 345 for (j=0; j<A_nnz; j++) { 346 k = riip[A_icol[j]]; 347 if (k>=i) val[k] = A_val[j]; 348 } 349 350 /* Add regularization */ 351 val[i] += epsdiag; 352 353 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 354 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 355 for (j=0; j<r_nnz; j++) { 356 k = r_icol[j]; 357 lvec[k] = diag[k] * r_val[j]; 358 val[i] -= r_val[j] * lvec[k]; 359 } 360 361 /* Calculate the new diagonal */ 362 diag[i] = val[i]; 363 if (PetscRealPart(diag[i])<droptol) { 364 ierr = PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");CHKERRQ(ierr); 365 ierr = PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);CHKERRQ(ierr); 366 367 /* Delete the whole matrix at once. */ 368 ierr = spbas_delete(retval);CHKERRQ(ierr); 369 return NEGATIVE_DIAGONAL; 370 } 371 372 /* If necessary, allocate arrays */ 373 if (r_nnz==0) { 374 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 375 if (ierr == PETSC_ERR_MEM) { 376 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 377 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr); 378 } else CHKERRQ(ierr); 379 r_icol = retval.icols[i]; 380 r_val = retval.values[i]; 381 } 382 383 /* Now, fill in */ 384 r_icol[r_nnz] = i; 385 r_val [r_nnz] = 1.0; 386 r_nnz++; 387 retval.row_nnz[i]++; 388 389 retval.nnz += r_nnz; 390 391 /* Calculate 392 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 393 for (j=1; j<p_nnz; j++) { 394 k = i+p_icol[j]; 395 r1_icol = retval.icols[k]; 396 r1_val = retval.values[k]; 397 for (jL=0; jL<retval.row_nnz[k]; jL++) { 398 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 399 } 400 val[k] /= diag[i]; 401 402 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) { 403 /* If necessary, allocate arrays */ 404 if (retval.row_nnz[k]==0) { 405 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 406 if (ierr == PETSC_ERR_MEM) { 407 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 408 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr); 409 r_icol = retval.icols[i]; 410 } else CHKERRQ(ierr); 411 } 412 413 retval.icols[k][retval.row_nnz[k]] = i; 414 retval.values[k][retval.row_nnz[k]] = val[k]; 415 retval.row_nnz[k]++; 416 } 417 val[k] = 0; 418 } 419 420 /* Erase the values used in the work arrays */ 421 for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0; 422 } 423 424 ierr=PetscFree(lvec);CHKERRQ(ierr); 425 ierr=PetscFree(val);CHKERRQ(ierr); 426 427 ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 428 ierr = PetscFree(max_row_nnz);CHKERRQ(ierr); 429 430 /* Place the inverse of the diagonals in the matrix */ 431 for (i=0; i<nrows; i++) { 432 r_nnz = retval.row_nnz[i]; 433 434 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 435 for (j=0; j<r_nnz-1; j++) { 436 retval.values[i][j] *= -1; 437 } 438 } 439 ierr = PetscFree(diag);CHKERRQ(ierr); 440 *matrix_L = retval; 441 PetscFunctionReturn(0); 442 } 443 444