1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "spbas_cholesky_row_alloc" 10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 11 { 12 PetscFunctionBegin; 13 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 14 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 15 *n_alloc_used += r_nnz; 16 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM); 17 PetscFunctionReturn(0); 18 } 19 20 21 /* 22 spbas_cholesky_garbage_collect: 23 move the rows which have been calculated so far, as well as 24 those currently under construction, to the front of the 25 array, while putting them in the proper order. 26 When it seems necessary, enlarge the current arrays. 27 28 NB: re-allocation is being simulated using 29 PetscMalloc, memcpy, PetscFree, because 30 PetscRealloc does not seem to exist. 31 32 */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "spbas_cholesky_garbage_collect" 35 PetscErrorCode spbas_cholesky_garbage_collect( 36 spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 37 Only the storage, not the contents of this matrix 38 is changed in this function */ 39 PetscInt i_row, /* I : Number of rows for which the final contents are 40 known */ 41 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 42 places in the arrays: they need not be moved any more */ 43 PetscInt *n_alloc_used, /* I/O: */ 44 PetscInt *max_row_nnz /*I : Over-estimate of the number of nonzeros needed to 45 store each row */ 46 ) 47 { 48 49 50 /* PSEUDO-CODE: 51 1. Choose the appropriate size for the arrays 52 2. Rescue the arrays which would be lost during garbage collection 53 3. Reallocate and correct administration 54 4. Move all arrays so that they are in proper order */ 55 56 PetscInt i,j; 57 PetscInt nrows = result->nrows; 58 PetscInt n_alloc_ok=0; 59 PetscInt n_alloc_ok_max=0; 60 PetscErrorCode ierr; 61 PetscInt need_already= 0; 62 PetscInt n_rows_ahead=0; 63 PetscInt max_need_extra= 0; 64 PetscInt n_alloc_max, n_alloc_est, n_alloc; 65 PetscInt n_alloc_now = result->n_alloc_icol; 66 PetscInt *alloc_icol_old = result->alloc_icol; 67 PetscScalar *alloc_val_old = result->alloc_val; 68 PetscInt *icol_rescue; 69 PetscScalar *val_rescue; 70 PetscInt n_rescue; 71 PetscInt n_row_rescue; 72 PetscInt i_here, i_last, n_copy; 73 const PetscReal xtra_perc = 20; 74 75 PetscFunctionBegin; 76 77 /********************************************************* 78 1. Choose appropriate array size 79 Count number of rows and memory usage which is already final */ 80 for (i=0;i<i_row; i++) { 81 n_alloc_ok += result->row_nnz[i]; 82 n_alloc_ok_max += max_row_nnz[i]; 83 } 84 85 /* Count currently needed memory usage and future memory requirements 86 (max, predicted)*/ 87 for (i=i_row; i<nrows; i++) { 88 if (!result->row_nnz[i]) { 89 max_need_extra += max_row_nnz[i]; 90 } else { 91 need_already += max_row_nnz[i]; 92 n_rows_ahead++; 93 } 94 } 95 96 /* Make maximal and realistic memory requirement estimates */ 97 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 98 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 99 100 /* Choose array sizes */ 101 if (n_alloc_max == n_alloc_est) { n_alloc = n_alloc_max; } 102 else if (n_alloc_now >= n_alloc_est) { n_alloc = n_alloc_now; } 103 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { n_alloc = n_alloc_max; } 104 else { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); } 105 106 /* If new estimate is less than what we already have, 107 don't reallocate, just garbage-collect */ 108 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 109 n_alloc = result->n_alloc_icol; 110 } 111 112 /* Motivate dimension choice */ 113 ierr = PetscInfo1(PETSC_NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 114 if (n_alloc_max == n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); } 115 else if (n_alloc_now >= n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); } 116 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); } 117 else { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); } 118 119 120 /********************************************************** 121 2. Rescue arrays which would be lost 122 Count how many rows and nonzeros will have to be rescued 123 when moving all arrays in place */ 124 n_row_rescue = 0; n_rescue = 0; 125 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 126 else { 127 i = *n_row_alloc_ok - 1; 128 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 129 } 130 131 for (i=*n_row_alloc_ok; i<nrows; i++) { 132 i_here = result->icols[i]-result->alloc_icol; 133 i_last = i_here + result->row_nnz[i]; 134 if (result->row_nnz[i]>0) { 135 if (*n_alloc_used > i_here || i_last > n_alloc) { 136 n_rescue += result->row_nnz[i]; 137 n_row_rescue++; 138 } 139 140 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 141 else { *n_alloc_used += max_row_nnz[i];} 142 } 143 } 144 145 /* Allocate rescue arrays */ 146 ierr = PetscMalloc( n_rescue * sizeof(int), &icol_rescue);CHKERRQ(ierr); 147 ierr = PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue);CHKERRQ(ierr); 148 149 /* Rescue the arrays which need rescuing */ 150 n_row_rescue = 0; n_rescue = 0; 151 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 152 else { 153 i = *n_row_alloc_ok - 1; 154 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 155 } 156 157 for (i=*n_row_alloc_ok; i<nrows; i++) { 158 i_here = result->icols[i]-result->alloc_icol; 159 i_last = i_here + result->row_nnz[i]; 160 if (result->row_nnz[i]>0) { 161 if (*n_alloc_used > i_here || i_last > n_alloc) { 162 ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 163 ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 164 n_rescue += result->row_nnz[i]; 165 n_row_rescue++; 166 } 167 168 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 169 else { *n_alloc_used += max_row_nnz[i];} 170 } 171 } 172 173 /********************************************************** 174 3. Reallocate and correct administration */ 175 176 if (n_alloc != result->n_alloc_icol) { 177 n_copy = PetscMin(n_alloc,result->n_alloc_icol); 178 179 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 180 181 Allocate new icol-data, copy old contents */ 182 ierr = PetscMalloc( n_alloc * sizeof(int), &result->alloc_icol);CHKERRQ(ierr); 183 ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr); 184 185 /* Update administration, Reset pointers to new arrays */ 186 result->n_alloc_icol = n_alloc; 187 for (i=0; i<nrows; i++) { 188 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 189 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 190 } 191 192 /* Delete old array */ 193 ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr); 194 195 /* Allocate new value-data, copy old contents */ 196 ierr = PetscMalloc( n_alloc * sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr); 197 ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr); 198 199 /* Update administration, Reset pointers to new arrays */ 200 result->n_alloc_val = n_alloc; 201 for (i=0; i<nrows; i++) { 202 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 203 } 204 205 /* Delete old array */ 206 ierr = PetscFree(alloc_val_old);CHKERRQ(ierr); 207 } 208 209 210 /********************************************************* 211 4. Copy all the arrays to their proper places */ 212 n_row_rescue = 0; n_rescue = 0; 213 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 214 else { 215 i = *n_row_alloc_ok - 1; 216 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 217 } 218 219 for (i=*n_row_alloc_ok; i<nrows; i++) { 220 i_here = result->icols[i]-result->alloc_icol; 221 i_last = i_here + result->row_nnz[i]; 222 223 result->icols[i] = result->alloc_icol + *n_alloc_used; 224 result->values[i]= result->alloc_val + *n_alloc_used; 225 226 if (result->row_nnz[i]>0) { 227 if (*n_alloc_used > i_here || i_last > n_alloc) { 228 ierr = PetscMemcpy((void*) result->icols[i], (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 229 ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 230 n_rescue += result->row_nnz[i]; 231 n_row_rescue++; 232 } else { 233 for (j=0; j<result->row_nnz[i]; j++) { 234 result->icols[i][j] = result->alloc_icol[i_here+j]; 235 result->values[i][j] = result->alloc_val[ i_here+j]; 236 } 237 } 238 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 239 else { *n_alloc_used += max_row_nnz[i];} 240 } 241 } 242 243 /* Delete the rescue arrays */ 244 ierr = PetscFree(icol_rescue);CHKERRQ(ierr); 245 ierr = PetscFree(val_rescue);CHKERRQ(ierr); 246 *n_row_alloc_ok = i_row; 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 spbas_incomplete_cholesky: 252 incomplete Cholesky decomposition of a square, symmetric, 253 positive definite matrix. 254 255 In case negative diagonals are encountered, function returns 256 NEGATIVE_DIAGONAL. When this happens, call this function again 257 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 258 droptol 259 */ 260 #undef __FUNCT__ 261 #define __FUNCT__ "spbas_incomplete_cholesky" 262 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L) 263 { 264 PetscInt jL; 265 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 266 PetscInt *ai=a->i,*aj=a->j; 267 MatScalar *aa=a->a; 268 PetscInt nrows, ncols; 269 PetscInt *max_row_nnz; 270 PetscErrorCode ierr; 271 spbas_matrix retval; 272 PetscScalar * diag; 273 PetscScalar * val; 274 PetscScalar * lvec; 275 PetscScalar epsdiag; 276 PetscInt i,j,k; 277 const PetscTruth do_values = PETSC_TRUE; 278 PetscInt * r1_icol; 279 PetscScalar *r1_val; 280 PetscInt * r_icol; 281 PetscInt r_nnz; 282 PetscScalar *r_val; 283 PetscInt * A_icol; 284 PetscInt A_nnz; 285 PetscScalar *A_val; 286 PetscInt * p_icol; 287 PetscInt p_nnz; 288 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 289 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 290 291 PetscFunctionBegin; 292 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 293 ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr); 294 ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr); 295 epsdiag *= epsdiag_in / nrows; 296 ierr = PetscInfo2(PETSC_NULL," Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr); 297 298 if (droptol<1e-10) {droptol=1e-10;} 299 300 if ( (nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows) ) SETERRQ( PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n"); 301 302 retval.nrows = nrows; 303 retval.ncols = nrows; 304 retval.nnz = pattern.nnz/10; 305 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 306 retval.block_data = PETSC_TRUE; 307 308 ierr = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr); 309 ierr = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));CHKERRQ(ierr); 310 ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 311 retval.nnz = 0; 312 313 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag);CHKERRQ(ierr); 314 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);CHKERRQ(ierr); 315 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec);CHKERRQ(ierr); 316 ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);CHKERRQ(ierr); 317 if (!(diag && val && lvec && max_row_nnz)) SETERRQ( PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n"); 318 319 ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 320 ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 321 ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));CHKERRQ(ierr); 322 323 /* Check correct format of sparseness pattern */ 324 if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ( PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 325 326 /* Count the nonzeros on transpose of pattern */ 327 for (i = 0; i<nrows; i++) { 328 p_nnz = pattern.row_nnz[i]; 329 p_icol = pattern.icols[i]; 330 for (j=0; j<p_nnz; j++) { 331 max_row_nnz[i+p_icol[j]]++; 332 } 333 } 334 335 /* Calculate rows of L */ 336 for (i = 0; i<nrows; i++) { 337 p_nnz = pattern.row_nnz[i]; 338 p_icol = pattern.icols[i]; 339 340 r_nnz = retval.row_nnz[i]; 341 r_icol = retval.icols[i]; 342 r_val = retval.values[i]; 343 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 344 A_icol = &aj[ai[rip[i]]]; 345 A_val = &aa[ai[rip[i]]]; 346 347 /* Calculate val += A(i,i:n)'; */ 348 for (j=0; j<A_nnz; j++) { 349 k = riip[A_icol[j]]; 350 if (k>=i) { val[k] = A_val[j]; } 351 } 352 353 /* Add regularization */ 354 val[i] += epsdiag; 355 356 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 357 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 358 for ( j=0; j<r_nnz; j++) { 359 k = r_icol[j]; 360 lvec[k] = diag[k] * r_val[j]; 361 val[i] -= r_val[j] * lvec[k]; 362 } 363 364 /* Calculate the new diagonal */ 365 diag[i] = val[i]; 366 if (PetscRealPart(diag[i])<droptol) { 367 ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n"); 368 ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1); 369 370 /* Delete the whole matrix at once. */ 371 ierr = spbas_delete(retval); 372 return NEGATIVE_DIAGONAL; 373 } 374 375 /* If necessary, allocate arrays */ 376 if (r_nnz==0) { 377 ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);CHKERRQ(ierr); 378 if (ierr == PETSC_ERR_MEM) { 379 ierr = spbas_cholesky_garbage_collect( &retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 380 ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used); 381 } 382 r_icol = retval.icols[i]; 383 r_val = retval.values[i]; 384 } 385 386 /* Now, fill in */ 387 r_icol[r_nnz] = i; 388 r_val [r_nnz] = 1.0; 389 r_nnz++; 390 retval.row_nnz[i]++; 391 392 retval.nnz += r_nnz; 393 394 /* Calculate 395 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 396 for (j=1; j<p_nnz; j++) { 397 k = i+p_icol[j]; 398 r1_icol = retval.icols[k]; 399 r1_val = retval.values[k]; 400 for (jL=0; jL<retval.row_nnz[k]; jL++) { 401 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 402 } 403 val[k] /= diag[i]; 404 405 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) { 406 /* If necessary, allocate arrays */ 407 if (retval.row_nnz[k]==0) { 408 ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], &n_alloc_used); 409 if (ierr == PETSC_ERR_MEM) { 410 ierr = spbas_cholesky_garbage_collect( &retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 411 ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr); 412 r_icol = retval.icols[i]; 413 r_val = retval.values[i]; 414 } 415 } 416 417 retval.icols[k][retval.row_nnz[k]] = i; 418 retval.values[k][retval.row_nnz[k]] = val[k]; 419 retval.row_nnz[k]++; 420 } 421 val[k] = 0; 422 } 423 424 /*Erase the values used in the work arrays */ 425 for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; } 426 } 427 428 ierr=PetscFree(lvec);CHKERRQ(ierr); 429 ierr=PetscFree(val);CHKERRQ(ierr); 430 431 ierr = spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 432 ierr=PetscFree(max_row_nnz);CHKERRQ(ierr); 433 434 /* Place the inverse of the diagonals in the matrix */ 435 for (i=0; i<nrows; i++) { 436 r_nnz = retval.row_nnz[i]; 437 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 438 for (j=0; j<r_nnz-1; j++) { 439 retval.values[i][j] *= -1; 440 } 441 } 442 ierr=PetscFree(diag);CHKERRQ(ierr); 443 *matrix_L = retval; 444 PetscFunctionReturn(0); 445 } 446 447