1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "spbas_cholesky_row_alloc" 10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 11 { 12 PetscFunctionBegin; 13 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 14 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 15 *n_alloc_used += r_nnz; 16 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM); 17 PetscFunctionReturn(0); 18 } 19 20 21 /* 22 spbas_cholesky_garbage_collect: 23 move the rows which have been calculated so far, as well as 24 those currently under construction, to the front of the 25 array, while putting them in the proper order. 26 When it seems necessary, enlarge the current arrays. 27 28 NB: re-allocation is being simulated using 29 PetscMalloc, memcpy, PetscFree, because 30 PetscRealloc does not seem to exist. 31 32 */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "spbas_cholesky_garbage_collect" 35 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 36 Only the storage, not the contents of this matrix is changed in this function */ 37 PetscInt i_row, /* I : Number of rows for which the final contents are known */ 38 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 39 places in the arrays: they need not be moved any more */ 40 PetscInt *n_alloc_used, /* I/O: */ 41 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 42 { 43 /* PSEUDO-CODE: 44 1. Choose the appropriate size for the arrays 45 2. Rescue the arrays which would be lost during garbage collection 46 3. Reallocate and correct administration 47 4. Move all arrays so that they are in proper order */ 48 49 PetscInt i,j; 50 PetscInt nrows = result->nrows; 51 PetscInt n_alloc_ok =0; 52 PetscInt n_alloc_ok_max=0; 53 PetscErrorCode ierr; 54 PetscInt need_already = 0; 55 PetscInt n_rows_ahead =0; 56 PetscInt max_need_extra= 0; 57 PetscInt n_alloc_max, n_alloc_est, n_alloc; 58 PetscInt n_alloc_now = result->n_alloc_icol; 59 PetscInt *alloc_icol_old = result->alloc_icol; 60 PetscScalar *alloc_val_old = result->alloc_val; 61 PetscInt *icol_rescue; 62 PetscScalar *val_rescue; 63 PetscInt n_rescue; 64 PetscInt n_row_rescue; 65 PetscInt i_here, i_last, n_copy; 66 const PetscReal xtra_perc = 20; 67 68 PetscFunctionBegin; 69 /********************************************************* 70 1. Choose appropriate array size 71 Count number of rows and memory usage which is already final */ 72 for (i=0; i<i_row; i++) { 73 n_alloc_ok += result->row_nnz[i]; 74 n_alloc_ok_max += max_row_nnz[i]; 75 } 76 77 /* Count currently needed memory usage and future memory requirements 78 (max, predicted)*/ 79 for (i=i_row; i<nrows; i++) { 80 if (!result->row_nnz[i]) { 81 max_need_extra += max_row_nnz[i]; 82 } else { 83 need_already += max_row_nnz[i]; 84 n_rows_ahead++; 85 } 86 } 87 88 /* Make maximal and realistic memory requirement estimates */ 89 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 90 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 91 92 /* Choose array sizes */ 93 if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max; 94 else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now; 95 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max; 96 else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); 97 98 /* If new estimate is less than what we already have, 99 don't reallocate, just garbage-collect */ 100 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 101 n_alloc = result->n_alloc_icol; 102 } 103 104 /* Motivate dimension choice */ 105 ierr = PetscInfo1(NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 106 if (n_alloc_max == n_alloc_est) { 107 ierr = PetscInfo(NULL,"this is the correct size\n");CHKERRQ(ierr); 108 } else if (n_alloc_now >= n_alloc_est) { 109 ierr = PetscInfo(NULL,"the current size, which seems enough\n");CHKERRQ(ierr); 110 } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { 111 ierr = PetscInfo(NULL,"the maximum estimate\n");CHKERRQ(ierr); 112 } else { 113 ierr = PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); 114 } 115 116 117 /********************************************************** 118 2. Rescue arrays which would be lost 119 Count how many rows and nonzeros will have to be rescued 120 when moving all arrays in place */ 121 n_row_rescue = 0; n_rescue = 0; 122 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 123 else { 124 i = *n_row_alloc_ok - 1; 125 126 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 127 } 128 129 for (i=*n_row_alloc_ok; i<nrows; i++) { 130 i_here = result->icols[i]-result->alloc_icol; 131 i_last = i_here + result->row_nnz[i]; 132 if (result->row_nnz[i]>0) { 133 if (*n_alloc_used > i_here || i_last > n_alloc) { 134 n_rescue += result->row_nnz[i]; 135 n_row_rescue++; 136 } 137 138 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 139 else *n_alloc_used += max_row_nnz[i]; 140 } 141 } 142 143 /* Allocate rescue arrays */ 144 ierr = PetscMalloc1(n_rescue, &icol_rescue);CHKERRQ(ierr); 145 ierr = PetscMalloc1(n_rescue, &val_rescue);CHKERRQ(ierr); 146 147 /* Rescue the arrays which need rescuing */ 148 n_row_rescue = 0; n_rescue = 0; 149 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 150 else { 151 i = *n_row_alloc_ok - 1; 152 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 153 } 154 155 for (i=*n_row_alloc_ok; i<nrows; i++) { 156 i_here = result->icols[i]-result->alloc_icol; 157 i_last = i_here + result->row_nnz[i]; 158 if (result->row_nnz[i]>0) { 159 if (*n_alloc_used > i_here || i_last > n_alloc) { 160 ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 161 ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 162 163 n_rescue += result->row_nnz[i]; 164 n_row_rescue++; 165 } 166 167 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 168 else *n_alloc_used += max_row_nnz[i]; 169 } 170 } 171 172 /********************************************************** 173 3. Reallocate and correct administration */ 174 175 if (n_alloc != result->n_alloc_icol) { 176 n_copy = PetscMin(n_alloc,result->n_alloc_icol); 177 178 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 179 180 Allocate new icol-data, copy old contents */ 181 ierr = PetscMalloc1(n_alloc, &result->alloc_icol);CHKERRQ(ierr); 182 ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr); 183 184 /* Update administration, Reset pointers to new arrays */ 185 result->n_alloc_icol = n_alloc; 186 for (i=0; i<nrows; i++) { 187 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 188 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 189 } 190 191 /* Delete old array */ 192 ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr); 193 194 /* Allocate new value-data, copy old contents */ 195 ierr = PetscMalloc1(n_alloc, &result->alloc_val);CHKERRQ(ierr); 196 ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr); 197 198 /* Update administration, Reset pointers to new arrays */ 199 result->n_alloc_val = n_alloc; 200 for (i=0; i<nrows; i++) { 201 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 202 } 203 204 /* Delete old array */ 205 ierr = PetscFree(alloc_val_old);CHKERRQ(ierr); 206 } 207 208 209 /********************************************************* 210 4. Copy all the arrays to their proper places */ 211 n_row_rescue = 0; n_rescue = 0; 212 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 213 else { 214 i = *n_row_alloc_ok - 1; 215 216 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 217 } 218 219 for (i=*n_row_alloc_ok; i<nrows; i++) { 220 i_here = result->icols[i]-result->alloc_icol; 221 i_last = i_here + result->row_nnz[i]; 222 223 result->icols[i] = result->alloc_icol + *n_alloc_used; 224 result->values[i]= result->alloc_val + *n_alloc_used; 225 226 if (result->row_nnz[i]>0) { 227 if (*n_alloc_used > i_here || i_last > n_alloc) { 228 ierr = PetscMemcpy((void*) result->icols[i], (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 229 ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 230 231 n_rescue += result->row_nnz[i]; 232 n_row_rescue++; 233 } else { 234 for (j=0; j<result->row_nnz[i]; j++) { 235 result->icols[i][j] = result->alloc_icol[i_here+j]; 236 result->values[i][j] = result->alloc_val[i_here+j]; 237 } 238 } 239 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 240 else *n_alloc_used += max_row_nnz[i]; 241 } 242 } 243 244 /* Delete the rescue arrays */ 245 ierr = PetscFree(icol_rescue);CHKERRQ(ierr); 246 ierr = PetscFree(val_rescue);CHKERRQ(ierr); 247 248 *n_row_alloc_ok = i_row; 249 PetscFunctionReturn(0); 250 } 251 252 /* 253 spbas_incomplete_cholesky: 254 incomplete Cholesky decomposition of a square, symmetric, 255 positive definite matrix. 256 257 In case negative diagonals are encountered, function returns 258 NEGATIVE_DIAGONAL. When this happens, call this function again 259 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 260 droptol 261 */ 262 #undef __FUNCT__ 263 #define __FUNCT__ "spbas_incomplete_cholesky" 264 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L) 265 { 266 PetscInt jL; 267 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 268 PetscInt *ai=a->i,*aj=a->j; 269 MatScalar *aa=a->a; 270 PetscInt nrows, ncols; 271 PetscInt *max_row_nnz; 272 PetscErrorCode ierr; 273 spbas_matrix retval; 274 PetscScalar *diag; 275 PetscScalar *val; 276 PetscScalar *lvec; 277 PetscScalar epsdiag; 278 PetscInt i,j,k; 279 const PetscBool do_values = PETSC_TRUE; 280 PetscInt *r1_icol; 281 PetscScalar *r1_val; 282 PetscInt *r_icol; 283 PetscInt r_nnz; 284 PetscScalar *r_val; 285 PetscInt *A_icol; 286 PetscInt A_nnz; 287 PetscScalar *A_val; 288 PetscInt *p_icol; 289 PetscInt p_nnz; 290 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 291 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 292 293 PetscFunctionBegin; 294 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 295 ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr); 296 ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr); 297 298 epsdiag *= epsdiag_in / nrows; 299 300 ierr = PetscInfo2(NULL," Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);CHKERRQ(ierr); 301 302 if (droptol<1e-10) droptol=1e-10; 303 304 if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n"); 305 306 retval.nrows = nrows; 307 retval.ncols = nrows; 308 retval.nnz = pattern.nnz/10; 309 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 310 retval.block_data = PETSC_TRUE; 311 312 ierr = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr); 313 ierr = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));CHKERRQ(ierr); 314 ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 315 retval.nnz = 0; 316 317 ierr = PetscMalloc1(nrows, &diag);CHKERRQ(ierr); 318 ierr = PetscMalloc1(nrows, &val);CHKERRQ(ierr); 319 ierr = PetscMalloc1(nrows, &lvec);CHKERRQ(ierr); 320 ierr = PetscMalloc1(nrows, &max_row_nnz);CHKERRQ(ierr); 321 if (!(diag && val && lvec && max_row_nnz)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n"); 322 323 ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 325 ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));CHKERRQ(ierr); 326 327 /* Check correct format of sparseness pattern */ 328 if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 329 330 /* Count the nonzeros on transpose of pattern */ 331 for (i = 0; i<nrows; i++) { 332 p_nnz = pattern.row_nnz[i]; 333 p_icol = pattern.icols[i]; 334 for (j=0; j<p_nnz; j++) { 335 max_row_nnz[i+p_icol[j]]++; 336 } 337 } 338 339 /* Calculate rows of L */ 340 for (i = 0; i<nrows; i++) { 341 p_nnz = pattern.row_nnz[i]; 342 p_icol = pattern.icols[i]; 343 344 r_nnz = retval.row_nnz[i]; 345 r_icol = retval.icols[i]; 346 r_val = retval.values[i]; 347 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 348 A_icol = &aj[ai[rip[i]]]; 349 A_val = &aa[ai[rip[i]]]; 350 351 /* Calculate val += A(i,i:n)'; */ 352 for (j=0; j<A_nnz; j++) { 353 k = riip[A_icol[j]]; 354 if (k>=i) val[k] = A_val[j]; 355 } 356 357 /* Add regularization */ 358 val[i] += epsdiag; 359 360 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 361 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 362 for (j=0; j<r_nnz; j++) { 363 k = r_icol[j]; 364 lvec[k] = diag[k] * r_val[j]; 365 val[i] -= r_val[j] * lvec[k]; 366 } 367 368 /* Calculate the new diagonal */ 369 diag[i] = val[i]; 370 if (PetscRealPart(diag[i])<droptol) { 371 ierr = PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");CHKERRQ(ierr); 372 ierr = PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);CHKERRQ(ierr); 373 374 /* Delete the whole matrix at once. */ 375 ierr = spbas_delete(retval);CHKERRQ(ierr); 376 return NEGATIVE_DIAGONAL; 377 } 378 379 /* If necessary, allocate arrays */ 380 if (r_nnz==0) { 381 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 382 if (ierr == PETSC_ERR_MEM) { 383 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 384 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr); 385 } else CHKERRQ(ierr); 386 r_icol = retval.icols[i]; 387 r_val = retval.values[i]; 388 } 389 390 /* Now, fill in */ 391 r_icol[r_nnz] = i; 392 r_val [r_nnz] = 1.0; 393 r_nnz++; 394 retval.row_nnz[i]++; 395 396 retval.nnz += r_nnz; 397 398 /* Calculate 399 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 400 for (j=1; j<p_nnz; j++) { 401 k = i+p_icol[j]; 402 r1_icol = retval.icols[k]; 403 r1_val = retval.values[k]; 404 for (jL=0; jL<retval.row_nnz[k]; jL++) { 405 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 406 } 407 val[k] /= diag[i]; 408 409 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) { 410 /* If necessary, allocate arrays */ 411 if (retval.row_nnz[k]==0) { 412 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 413 if (ierr == PETSC_ERR_MEM) { 414 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 415 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr); 416 r_icol = retval.icols[i]; 417 r_val = retval.values[i]; 418 } else CHKERRQ(ierr); 419 } 420 421 retval.icols[k][retval.row_nnz[k]] = i; 422 retval.values[k][retval.row_nnz[k]] = val[k]; 423 retval.row_nnz[k]++; 424 } 425 val[k] = 0; 426 } 427 428 /* Erase the values used in the work arrays */ 429 for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0; 430 } 431 432 ierr=PetscFree(lvec);CHKERRQ(ierr); 433 ierr=PetscFree(val);CHKERRQ(ierr); 434 435 ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 436 ierr = PetscFree(max_row_nnz);CHKERRQ(ierr); 437 438 /* Place the inverse of the diagonals in the matrix */ 439 for (i=0; i<nrows; i++) { 440 r_nnz = retval.row_nnz[i]; 441 442 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 443 for (j=0; j<r_nnz-1; j++) { 444 retval.values[i][j] *= -1; 445 } 446 } 447 ierr = PetscFree(diag);CHKERRQ(ierr); 448 *matrix_L = retval; 449 PetscFunctionReturn(0); 450 } 451 452