1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "spbas_cholesky_row_alloc" 10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 11 { 12 PetscFunctionBegin; 13 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 14 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 15 *n_alloc_used += r_nnz; 16 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM); 17 PetscFunctionReturn(0); 18 } 19 20 21 /* 22 spbas_cholesky_garbage_collect: 23 move the rows which have been calculated so far, as well as 24 those currently under construction, to the front of the 25 array, while putting them in the proper order. 26 When it seems necessary, enlarge the current arrays. 27 28 NB: re-allocation is being simulated using 29 PetscMalloc, memcpy, PetscFree, because 30 PetscRealloc does not seem to exist. 31 32 */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "spbas_cholesky_garbage_collect" 35 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 36 Only the storage, not the contents of this matrix is changed in this function */ 37 PetscInt i_row, /* I : Number of rows for which the final contents are known */ 38 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 39 places in the arrays: they need not be moved any more */ 40 PetscInt *n_alloc_used, /* I/O: */ 41 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 42 { 43 /* PSEUDO-CODE: 44 1. Choose the appropriate size for the arrays 45 2. Rescue the arrays which would be lost during garbage collection 46 3. Reallocate and correct administration 47 4. Move all arrays so that they are in proper order */ 48 49 PetscInt i,j; 50 PetscInt nrows = result->nrows; 51 PetscInt n_alloc_ok =0; 52 PetscInt n_alloc_ok_max=0; 53 PetscErrorCode ierr; 54 PetscInt need_already = 0; 55 PetscInt n_rows_ahead =0; 56 PetscInt max_need_extra= 0; 57 PetscInt n_alloc_max, n_alloc_est, n_alloc; 58 PetscInt n_alloc_now = result->n_alloc_icol; 59 PetscInt *alloc_icol_old = result->alloc_icol; 60 PetscScalar *alloc_val_old = result->alloc_val; 61 PetscInt *icol_rescue; 62 PetscScalar *val_rescue; 63 PetscInt n_rescue; 64 PetscInt n_row_rescue; 65 PetscInt i_here, i_last, n_copy; 66 const PetscReal xtra_perc = 20; 67 68 PetscFunctionBegin; 69 /********************************************************* 70 1. Choose appropriate array size 71 Count number of rows and memory usage which is already final */ 72 for (i=0; i<i_row; i++) { 73 n_alloc_ok += result->row_nnz[i]; 74 n_alloc_ok_max += max_row_nnz[i]; 75 } 76 77 /* Count currently needed memory usage and future memory requirements 78 (max, predicted)*/ 79 for (i=i_row; i<nrows; i++) { 80 if (!result->row_nnz[i]) { 81 max_need_extra += max_row_nnz[i]; 82 } else { 83 need_already += max_row_nnz[i]; 84 n_rows_ahead++; 85 } 86 } 87 88 /* Make maximal and realistic memory requirement estimates */ 89 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 90 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 91 92 /* Choose array sizes */ 93 if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max; 94 else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now; 95 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max; 96 else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); 97 98 /* If new estimate is less than what we already have, 99 don't reallocate, just garbage-collect */ 100 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 101 n_alloc = result->n_alloc_icol; 102 } 103 104 /* Motivate dimension choice */ 105 ierr = PetscInfo1(NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 106 if (n_alloc_max == n_alloc_est) { 107 ierr = PetscInfo(NULL,"this is the correct size\n");CHKERRQ(ierr); 108 } else if (n_alloc_now >= n_alloc_est) { 109 ierr = PetscInfo(NULL,"the current size, which seems enough\n");CHKERRQ(ierr); 110 } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { 111 ierr = PetscInfo(NULL,"the maximum estimate\n");CHKERRQ(ierr); 112 } else { 113 ierr = PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); 114 } 115 116 117 /********************************************************** 118 2. Rescue arrays which would be lost 119 Count how many rows and nonzeros will have to be rescued 120 when moving all arrays in place */ 121 n_row_rescue = 0; n_rescue = 0; 122 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 123 else { 124 i = *n_row_alloc_ok - 1; 125 126 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 127 } 128 129 for (i=*n_row_alloc_ok; i<nrows; i++) { 130 i_here = result->icols[i]-result->alloc_icol; 131 i_last = i_here + result->row_nnz[i]; 132 if (result->row_nnz[i]>0) { 133 if (*n_alloc_used > i_here || i_last > n_alloc) { 134 n_rescue += result->row_nnz[i]; 135 n_row_rescue++; 136 } 137 138 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 139 else *n_alloc_used += max_row_nnz[i]; 140 } 141 } 142 143 /* Allocate rescue arrays */ 144 ierr = PetscMalloc1(n_rescue, &icol_rescue);CHKERRQ(ierr); 145 ierr = PetscMalloc1(n_rescue, &val_rescue);CHKERRQ(ierr); 146 147 /* Rescue the arrays which need rescuing */ 148 n_row_rescue = 0; n_rescue = 0; 149 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 150 else { 151 i = *n_row_alloc_ok - 1; 152 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 153 } 154 155 for (i=*n_row_alloc_ok; i<nrows; i++) { 156 i_here = result->icols[i]-result->alloc_icol; 157 i_last = i_here + result->row_nnz[i]; 158 if (result->row_nnz[i]>0) { 159 if (*n_alloc_used > i_here || i_last > n_alloc) { 160 ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(PetscInt));CHKERRQ(ierr); 161 ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 162 163 n_rescue += result->row_nnz[i]; 164 n_row_rescue++; 165 } 166 167 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 168 else *n_alloc_used += max_row_nnz[i]; 169 } 170 } 171 172 /********************************************************** 173 3. Reallocate and correct administration */ 174 175 if (n_alloc != result->n_alloc_icol) { 176 n_copy = PetscMin(n_alloc,result->n_alloc_icol); 177 178 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 179 180 Allocate new icol-data, copy old contents */ 181 ierr = PetscMalloc1(n_alloc, &result->alloc_icol);CHKERRQ(ierr); 182 ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(PetscInt));CHKERRQ(ierr); 183 184 /* Update administration, Reset pointers to new arrays */ 185 result->n_alloc_icol = n_alloc; 186 for (i=0; i<nrows; i++) { 187 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 188 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 189 } 190 191 /* Delete old array */ 192 ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr); 193 194 /* Allocate new value-data, copy old contents */ 195 ierr = PetscMalloc1(n_alloc, &result->alloc_val);CHKERRQ(ierr); 196 ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr); 197 198 /* Update administration, Reset pointers to new arrays */ 199 result->n_alloc_val = n_alloc; 200 for (i=0; i<nrows; i++) { 201 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 202 } 203 204 /* Delete old array */ 205 ierr = PetscFree(alloc_val_old);CHKERRQ(ierr); 206 } 207 208 209 /********************************************************* 210 4. Copy all the arrays to their proper places */ 211 n_row_rescue = 0; n_rescue = 0; 212 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 213 else { 214 i = *n_row_alloc_ok - 1; 215 216 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 217 } 218 219 for (i=*n_row_alloc_ok; i<nrows; i++) { 220 i_here = result->icols[i]-result->alloc_icol; 221 i_last = i_here + result->row_nnz[i]; 222 223 result->icols[i] = result->alloc_icol + *n_alloc_used; 224 result->values[i]= result->alloc_val + *n_alloc_used; 225 226 if (result->row_nnz[i]>0) { 227 if (*n_alloc_used > i_here || i_last > n_alloc) { 228 ierr = PetscMemcpy((void*) result->icols[i], (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 230 231 n_rescue += result->row_nnz[i]; 232 n_row_rescue++; 233 } else { 234 for (j=0; j<result->row_nnz[i]; j++) { 235 result->icols[i][j] = result->alloc_icol[i_here+j]; 236 result->values[i][j] = result->alloc_val[i_here+j]; 237 } 238 } 239 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 240 else *n_alloc_used += max_row_nnz[i]; 241 } 242 } 243 244 /* Delete the rescue arrays */ 245 ierr = PetscFree(icol_rescue);CHKERRQ(ierr); 246 ierr = PetscFree(val_rescue);CHKERRQ(ierr); 247 248 *n_row_alloc_ok = i_row; 249 PetscFunctionReturn(0); 250 } 251 252 /* 253 spbas_incomplete_cholesky: 254 incomplete Cholesky decomposition of a square, symmetric, 255 positive definite matrix. 256 257 In case negative diagonals are encountered, function returns 258 NEGATIVE_DIAGONAL. When this happens, call this function again 259 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 260 droptol 261 */ 262 #undef __FUNCT__ 263 #define __FUNCT__ "spbas_incomplete_cholesky" 264 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L) 265 { 266 PetscInt jL; 267 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 268 PetscInt *ai=a->i,*aj=a->j; 269 MatScalar *aa=a->a; 270 PetscInt nrows, ncols; 271 PetscInt *max_row_nnz; 272 PetscErrorCode ierr; 273 spbas_matrix retval; 274 PetscScalar *diag; 275 PetscScalar *val; 276 PetscScalar *lvec; 277 PetscScalar epsdiag; 278 PetscInt i,j,k; 279 const PetscBool do_values = PETSC_TRUE; 280 PetscInt *r1_icol; 281 PetscScalar *r1_val; 282 PetscInt *r_icol; 283 PetscInt r_nnz; 284 PetscScalar *r_val; 285 PetscInt *A_icol; 286 PetscInt A_nnz; 287 PetscScalar *A_val; 288 PetscInt *p_icol; 289 PetscInt p_nnz; 290 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 291 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 292 293 PetscFunctionBegin; 294 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 295 ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr); 296 ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr); 297 298 epsdiag *= epsdiag_in / nrows; 299 300 ierr = PetscInfo2(NULL," Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);CHKERRQ(ierr); 301 302 if (droptol<1e-10) droptol=1e-10; 303 304 if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n"); 305 306 retval.nrows = nrows; 307 retval.ncols = nrows; 308 retval.nnz = pattern.nnz/10; 309 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 310 retval.block_data = PETSC_TRUE; 311 312 ierr = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr); 313 ierr = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(PetscInt));CHKERRQ(ierr); 314 ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 315 retval.nnz = 0; 316 317 ierr = PetscMalloc1(nrows, &diag);CHKERRQ(ierr); 318 ierr = PetscMalloc1(nrows, &val);CHKERRQ(ierr); 319 ierr = PetscMalloc1(nrows, &lvec);CHKERRQ(ierr); 320 ierr = PetscMalloc1(nrows, &max_row_nnz);CHKERRQ(ierr); 321 322 ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(PetscInt));CHKERRQ(ierr); 325 326 /* Check correct format of sparseness pattern */ 327 if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 328 329 /* Count the nonzeros on transpose of pattern */ 330 for (i = 0; i<nrows; i++) { 331 p_nnz = pattern.row_nnz[i]; 332 p_icol = pattern.icols[i]; 333 for (j=0; j<p_nnz; j++) { 334 max_row_nnz[i+p_icol[j]]++; 335 } 336 } 337 338 /* Calculate rows of L */ 339 for (i = 0; i<nrows; i++) { 340 p_nnz = pattern.row_nnz[i]; 341 p_icol = pattern.icols[i]; 342 343 r_nnz = retval.row_nnz[i]; 344 r_icol = retval.icols[i]; 345 r_val = retval.values[i]; 346 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 347 A_icol = &aj[ai[rip[i]]]; 348 A_val = &aa[ai[rip[i]]]; 349 350 /* Calculate val += A(i,i:n)'; */ 351 for (j=0; j<A_nnz; j++) { 352 k = riip[A_icol[j]]; 353 if (k>=i) val[k] = A_val[j]; 354 } 355 356 /* Add regularization */ 357 val[i] += epsdiag; 358 359 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 360 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 361 for (j=0; j<r_nnz; j++) { 362 k = r_icol[j]; 363 lvec[k] = diag[k] * r_val[j]; 364 val[i] -= r_val[j] * lvec[k]; 365 } 366 367 /* Calculate the new diagonal */ 368 diag[i] = val[i]; 369 if (PetscRealPart(diag[i])<droptol) { 370 ierr = PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");CHKERRQ(ierr); 371 ierr = PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);CHKERRQ(ierr); 372 373 /* Delete the whole matrix at once. */ 374 ierr = spbas_delete(retval);CHKERRQ(ierr); 375 return NEGATIVE_DIAGONAL; 376 } 377 378 /* If necessary, allocate arrays */ 379 if (r_nnz==0) { 380 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 381 if (ierr == PETSC_ERR_MEM) { 382 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 383 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr); 384 } else CHKERRQ(ierr); 385 r_icol = retval.icols[i]; 386 r_val = retval.values[i]; 387 } 388 389 /* Now, fill in */ 390 r_icol[r_nnz] = i; 391 r_val [r_nnz] = 1.0; 392 r_nnz++; 393 retval.row_nnz[i]++; 394 395 retval.nnz += r_nnz; 396 397 /* Calculate 398 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 399 for (j=1; j<p_nnz; j++) { 400 k = i+p_icol[j]; 401 r1_icol = retval.icols[k]; 402 r1_val = retval.values[k]; 403 for (jL=0; jL<retval.row_nnz[k]; jL++) { 404 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 405 } 406 val[k] /= diag[i]; 407 408 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) { 409 /* If necessary, allocate arrays */ 410 if (retval.row_nnz[k]==0) { 411 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 412 if (ierr == PETSC_ERR_MEM) { 413 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 414 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr); 415 r_icol = retval.icols[i]; 416 } else CHKERRQ(ierr); 417 } 418 419 retval.icols[k][retval.row_nnz[k]] = i; 420 retval.values[k][retval.row_nnz[k]] = val[k]; 421 retval.row_nnz[k]++; 422 } 423 val[k] = 0; 424 } 425 426 /* Erase the values used in the work arrays */ 427 for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0; 428 } 429 430 ierr=PetscFree(lvec);CHKERRQ(ierr); 431 ierr=PetscFree(val);CHKERRQ(ierr); 432 433 ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 434 ierr = PetscFree(max_row_nnz);CHKERRQ(ierr); 435 436 /* Place the inverse of the diagonals in the matrix */ 437 for (i=0; i<nrows; i++) { 438 r_nnz = retval.row_nnz[i]; 439 440 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 441 for (j=0; j<r_nnz-1; j++) { 442 retval.values[i][j] *= -1; 443 } 444 } 445 ierr = PetscFree(diag);CHKERRQ(ierr); 446 *matrix_L = retval; 447 PetscFunctionReturn(0); 448 } 449 450