1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for BAIJ format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" 7 #include "src/inline/ilu.h" 8 9 /* ------------------------------------------------------------*/ 10 /* 11 Version for when blocks are 2 by 2 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2" 15 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat A,MatFactorInfo *info,Mat *B) 16 { 17 Mat C = *B; 18 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 19 IS isrow = b->row,isicol = b->icol; 20 PetscErrorCode ierr; 21 PetscInt *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 22 PetscInt *ajtmpold,*ajtmp,nz,row; 23 PetscInt *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 24 MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 25 MatScalar p1,p2,p3,p4; 26 MatScalar *ba = b->a,*aa = a->a; 27 PetscReal shift = info->shiftinblocks; 28 29 PetscFunctionBegin; 30 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 31 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 32 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 33 34 for (i=0; i<n; i++) { 35 nz = bi[i+1] - bi[i]; 36 ajtmp = bj + bi[i]; 37 for (j=0; j<nz; j++) { 38 x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 39 } 40 /* load in initial (unfactored row) */ 41 idx = r[i]; 42 nz = ai[idx+1] - ai[idx]; 43 ajtmpold = aj + ai[idx]; 44 v = aa + 4*ai[idx]; 45 for (j=0; j<nz; j++) { 46 x = rtmp+4*ic[ajtmpold[j]]; 47 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 48 v += 4; 49 } 50 row = *ajtmp++; 51 while (row < i) { 52 pc = rtmp + 4*row; 53 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 54 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 55 pv = ba + 4*diag_offset[row]; 56 pj = bj + diag_offset[row] + 1; 57 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 58 pc[0] = m1 = p1*x1 + p3*x2; 59 pc[1] = m2 = p2*x1 + p4*x2; 60 pc[2] = m3 = p1*x3 + p3*x4; 61 pc[3] = m4 = p2*x3 + p4*x4; 62 nz = bi[row+1] - diag_offset[row] - 1; 63 pv += 4; 64 for (j=0; j<nz; j++) { 65 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 66 x = rtmp + 4*pj[j]; 67 x[0] -= m1*x1 + m3*x2; 68 x[1] -= m2*x1 + m4*x2; 69 x[2] -= m1*x3 + m3*x4; 70 x[3] -= m2*x3 + m4*x4; 71 pv += 4; 72 } 73 ierr = PetscLogFlops(16*nz+12);CHKERRQ(ierr); 74 } 75 row = *ajtmp++; 76 } 77 /* finished row so stick it into b->a */ 78 pv = ba + 4*bi[i]; 79 pj = bj + bi[i]; 80 nz = bi[i+1] - bi[i]; 81 for (j=0; j<nz; j++) { 82 x = rtmp+4*pj[j]; 83 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 84 pv += 4; 85 } 86 /* invert diagonal block */ 87 w = ba + 4*diag_offset[i]; 88 ierr = Kernel_A_gets_inverse_A_2(w,shift);CHKERRQ(ierr); 89 } 90 91 ierr = PetscFree(rtmp);CHKERRQ(ierr); 92 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 93 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 94 C->ops->solve = MatSolve_SeqBAIJ_2; 95 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 96 C->factor = MAT_FACTOR_LU; 97 C->assembled = PETSC_TRUE; 98 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 99 PetscFunctionReturn(0); 100 } 101 /* 102 Version for when blocks are 2 by 2 Using natural ordering 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 106 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *B) 107 { 108 Mat C = *B; 109 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 110 PetscErrorCode ierr; 111 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 112 PetscInt *ajtmpold,*ajtmp,nz,row; 113 PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 114 MatScalar *pv,*v,*rtmp,*pc,*w,*x; 115 MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 116 MatScalar *ba = b->a,*aa = a->a; 117 PetscReal shift = info->shiftinblocks; 118 119 PetscFunctionBegin; 120 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 121 122 for (i=0; i<n; i++) { 123 nz = bi[i+1] - bi[i]; 124 ajtmp = bj + bi[i]; 125 for (j=0; j<nz; j++) { 126 x = rtmp+4*ajtmp[j]; 127 x[0] = x[1] = x[2] = x[3] = 0.0; 128 } 129 /* load in initial (unfactored row) */ 130 nz = ai[i+1] - ai[i]; 131 ajtmpold = aj + ai[i]; 132 v = aa + 4*ai[i]; 133 for (j=0; j<nz; j++) { 134 x = rtmp+4*ajtmpold[j]; 135 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 136 v += 4; 137 } 138 row = *ajtmp++; 139 while (row < i) { 140 pc = rtmp + 4*row; 141 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 142 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 143 pv = ba + 4*diag_offset[row]; 144 pj = bj + diag_offset[row] + 1; 145 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 146 pc[0] = m1 = p1*x1 + p3*x2; 147 pc[1] = m2 = p2*x1 + p4*x2; 148 pc[2] = m3 = p1*x3 + p3*x4; 149 pc[3] = m4 = p2*x3 + p4*x4; 150 nz = bi[row+1] - diag_offset[row] - 1; 151 pv += 4; 152 for (j=0; j<nz; j++) { 153 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 154 x = rtmp + 4*pj[j]; 155 x[0] -= m1*x1 + m3*x2; 156 x[1] -= m2*x1 + m4*x2; 157 x[2] -= m1*x3 + m3*x4; 158 x[3] -= m2*x3 + m4*x4; 159 pv += 4; 160 } 161 ierr = PetscLogFlops(16*nz+12);CHKERRQ(ierr); 162 } 163 row = *ajtmp++; 164 } 165 /* finished row so stick it into b->a */ 166 pv = ba + 4*bi[i]; 167 pj = bj + bi[i]; 168 nz = bi[i+1] - bi[i]; 169 for (j=0; j<nz; j++) { 170 x = rtmp+4*pj[j]; 171 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 172 pv += 4; 173 } 174 /* invert diagonal block */ 175 w = ba + 4*diag_offset[i]; 176 ierr = Kernel_A_gets_inverse_A_2(w,shift);CHKERRQ(ierr); 177 } 178 179 ierr = PetscFree(rtmp);CHKERRQ(ierr); 180 C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 181 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 182 C->assembled = PETSC_TRUE; 183 ierr = PetscLogFlops(1.3333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 184 PetscFunctionReturn(0); 185 } 186 187 /* ----------------------------------------------------------- */ 188 /* 189 Version for when blocks are 1 by 1. 190 */ 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1" 193 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat A,MatFactorInfo *info,Mat *B) 194 { 195 Mat C = *B; 196 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 197 IS isrow = b->row,isicol = b->icol; 198 PetscErrorCode ierr; 199 PetscInt *r,*ic,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 200 PetscInt *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 201 PetscInt *diag_offset = b->diag,diag,*pj; 202 MatScalar *pv,*v,*rtmp,multiplier,*pc; 203 MatScalar *ba = b->a,*aa = a->a; 204 205 PetscFunctionBegin; 206 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 209 210 for (i=0; i<n; i++) { 211 nz = bi[i+1] - bi[i]; 212 ajtmp = bj + bi[i]; 213 for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 214 215 /* load in initial (unfactored row) */ 216 nz = ai[r[i]+1] - ai[r[i]]; 217 ajtmpold = aj + ai[r[i]]; 218 v = aa + ai[r[i]]; 219 for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 220 221 row = *ajtmp++; 222 while (row < i) { 223 pc = rtmp + row; 224 if (*pc != 0.0) { 225 pv = ba + diag_offset[row]; 226 pj = bj + diag_offset[row] + 1; 227 multiplier = *pc * *pv++; 228 *pc = multiplier; 229 nz = bi[row+1] - diag_offset[row] - 1; 230 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 231 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 232 } 233 row = *ajtmp++; 234 } 235 /* finished row so stick it into b->a */ 236 pv = ba + bi[i]; 237 pj = bj + bi[i]; 238 nz = bi[i+1] - bi[i]; 239 for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];} 240 diag = diag_offset[i] - bi[i]; 241 /* check pivot entry for current row */ 242 if (pv[diag] == 0.0) { 243 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 244 } 245 pv[diag] = 1.0/pv[diag]; 246 } 247 248 ierr = PetscFree(rtmp);CHKERRQ(ierr); 249 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 250 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 251 C->ops->solve = MatSolve_SeqBAIJ_1; 252 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 253 C->assembled = PETSC_TRUE; 254 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 EXTERN_C_BEGIN 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatGetFactor_seqbaij_petsc" 261 PetscErrorCode MatGetFactor_seqbaij_petsc(Mat A,MatFactorType ftype,Mat *B) 262 { 263 PetscInt n = A->rmap->n; 264 PetscErrorCode ierr; 265 266 PetscFunctionBegin; 267 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 268 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 269 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 270 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 271 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqBAIJ; 272 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqBAIJ; 273 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 274 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 275 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 276 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqBAIJ; 277 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqBAIJ; 278 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 279 PetscFunctionReturn(0); 280 } 281 EXTERN_C_END 282 283 EXTERN_C_BEGIN 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 286 PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 287 { 288 PetscFunctionBegin; 289 *flg = PETSC_TRUE; 290 PetscFunctionReturn(0); 291 } 292 EXTERN_C_END 293 294 /* ----------------------------------------------------------- */ 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatLUFactor_SeqBAIJ" 297 PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 298 { 299 PetscErrorCode ierr; 300 Mat C; 301 302 PetscFunctionBegin; 303 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 304 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 305 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 306 A->ops->solve = C->ops->solve; 307 A->ops->solvetranspose = C->ops->solvetranspose; 308 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 309 ierr = PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);CHKERRQ(ierr); 310 PetscFunctionReturn(0); 311 } 312 313 #include "src/mat/impls/sbaij/seq/sbaij.h" 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N" 316 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat A,MatFactorInfo *info,Mat *B) 317 { 318 PetscErrorCode ierr; 319 Mat C = *B; 320 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 321 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 322 IS ip=b->row; 323 PetscInt *rip,i,j,mbs=a->mbs,bs=A->rmap->bs,*bi=b->i,*bj=b->j,*bcol; 324 PetscInt *ai=a->i,*aj=a->j; 325 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 326 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 327 PetscReal zeropivot,rs,shiftnz; 328 PetscReal shiftpd; 329 ChShift_Ctx sctx; 330 PetscInt newshift; 331 332 PetscFunctionBegin; 333 if (bs > 1) { 334 if (!a->sbaijMat){ 335 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 336 } 337 ierr = (a->sbaijMat)->ops->choleskyfactornumeric(a->sbaijMat,info,B);CHKERRQ(ierr); 338 ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr); 339 a->sbaijMat = PETSC_NULL; 340 PetscFunctionReturn(0); 341 } 342 343 /* initialization */ 344 shiftnz = info->shiftnz; 345 shiftpd = info->shiftpd; 346 zeropivot = info->zeropivot; 347 348 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 349 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 350 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 351 jl = il + mbs; 352 rtmp = (MatScalar*)(jl + mbs); 353 354 sctx.shift_amount = 0; 355 sctx.nshift = 0; 356 do { 357 sctx.chshift = PETSC_FALSE; 358 for (i=0; i<mbs; i++) { 359 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 360 } 361 362 for (k = 0; k<mbs; k++){ 363 bval = ba + bi[k]; 364 /* initialize k-th row by the perm[k]-th row of A */ 365 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 366 for (j = jmin; j < jmax; j++){ 367 col = rip[aj[j]]; 368 if (col >= k){ /* only take upper triangular entry */ 369 rtmp[col] = aa[j]; 370 *bval++ = 0.0; /* for in-place factorization */ 371 } 372 } 373 374 /* shift the diagonal of the matrix */ 375 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 376 377 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 378 dk = rtmp[k]; 379 i = jl[k]; /* first row to be added to k_th row */ 380 381 while (i < k){ 382 nexti = jl[i]; /* next row to be added to k_th row */ 383 384 /* compute multiplier, update diag(k) and U(i,k) */ 385 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 386 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 387 dk += uikdi*ba[ili]; 388 ba[ili] = uikdi; /* -U(i,k) */ 389 390 /* add multiple of row i to k-th row */ 391 jmin = ili + 1; jmax = bi[i+1]; 392 if (jmin < jmax){ 393 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 394 /* update il and jl for row i */ 395 il[i] = jmin; 396 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 397 } 398 i = nexti; 399 } 400 401 /* shift the diagonals when zero pivot is detected */ 402 /* compute rs=sum of abs(off-diagonal) */ 403 rs = 0.0; 404 jmin = bi[k]+1; 405 nz = bi[k+1] - jmin; 406 if (nz){ 407 bcol = bj + jmin; 408 while (nz--){ 409 rs += PetscAbsScalar(rtmp[*bcol]); 410 bcol++; 411 } 412 } 413 414 sctx.rs = rs; 415 sctx.pv = dk; 416 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 417 if (newshift == 1) break; 418 419 /* copy data into U(k,:) */ 420 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 421 jmin = bi[k]+1; jmax = bi[k+1]; 422 if (jmin < jmax) { 423 for (j=jmin; j<jmax; j++){ 424 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 425 } 426 /* add the k-th row into il and jl */ 427 il[k] = jmin; 428 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 429 } 430 } 431 } while (sctx.chshift); 432 ierr = PetscFree(il);CHKERRQ(ierr); 433 434 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 435 C->factor = MAT_FACTOR_CHOLESKY; 436 C->assembled = PETSC_TRUE; 437 C->preallocated = PETSC_TRUE; 438 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 439 if (sctx.nshift){ 440 if (shiftnz) { 441 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 442 } else if (shiftpd) { 443 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 444 } 445 } 446 PetscFunctionReturn(0); 447 } 448 449 #undef __FUNCT__ 450 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering" 451 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *fact) 452 { 453 Mat C = *fact; 454 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 455 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 456 PetscErrorCode ierr; 457 PetscInt i,j,am=a->mbs; 458 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 459 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz; 460 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 461 PetscReal zeropivot,rs,shiftnz; 462 PetscReal shiftpd; 463 ChShift_Ctx sctx; 464 PetscInt newshift; 465 466 PetscFunctionBegin; 467 /* initialization */ 468 shiftnz = info->shiftnz; 469 shiftpd = info->shiftpd; 470 zeropivot = info->zeropivot; 471 472 nz = (2*am+1)*sizeof(PetscInt)+am*sizeof(MatScalar); 473 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 474 jl = il + am; 475 rtmp = (MatScalar*)(jl + am); 476 477 sctx.shift_amount = 0; 478 sctx.nshift = 0; 479 do { 480 sctx.chshift = PETSC_FALSE; 481 for (i=0; i<am; i++) { 482 rtmp[i] = 0.0; jl[i] = am; il[0] = 0; 483 } 484 485 for (k = 0; k<am; k++){ 486 /* initialize k-th row with elements nonzero in row perm(k) of A */ 487 nz = ai[k+1] - ai[k]; 488 acol = aj + ai[k]; 489 aval = aa + ai[k]; 490 bval = ba + bi[k]; 491 while (nz -- ){ 492 if (*acol < k) { /* skip lower triangular entries */ 493 acol++; aval++; 494 } else { 495 rtmp[*acol++] = *aval++; 496 *bval++ = 0.0; /* for in-place factorization */ 497 } 498 } 499 500 /* shift the diagonal of the matrix */ 501 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 502 503 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 504 dk = rtmp[k]; 505 i = jl[k]; /* first row to be added to k_th row */ 506 507 while (i < k){ 508 nexti = jl[i]; /* next row to be added to k_th row */ 509 /* compute multiplier, update D(k) and U(i,k) */ 510 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 511 uikdi = - ba[ili]*ba[bi[i]]; 512 dk += uikdi*ba[ili]; 513 ba[ili] = uikdi; /* -U(i,k) */ 514 515 /* add multiple of row i to k-th row ... */ 516 jmin = ili + 1; 517 nz = bi[i+1] - jmin; 518 if (nz > 0){ 519 bcol = bj + jmin; 520 bval = ba + jmin; 521 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 522 /* update il and jl for i-th row */ 523 il[i] = jmin; 524 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 525 } 526 i = nexti; 527 } 528 529 /* shift the diagonals when zero pivot is detected */ 530 /* compute rs=sum of abs(off-diagonal) */ 531 rs = 0.0; 532 jmin = bi[k]+1; 533 nz = bi[k+1] - jmin; 534 if (nz){ 535 bcol = bj + jmin; 536 while (nz--){ 537 rs += PetscAbsScalar(rtmp[*bcol]); 538 bcol++; 539 } 540 } 541 542 sctx.rs = rs; 543 sctx.pv = dk; 544 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 545 if (newshift == 1) break; /* sctx.shift_amount is updated */ 546 547 /* copy data into U(k,:) */ 548 ba[bi[k]] = 1.0/dk; 549 jmin = bi[k]+1; 550 nz = bi[k+1] - jmin; 551 if (nz){ 552 bcol = bj + jmin; 553 bval = ba + jmin; 554 while (nz--){ 555 *bval++ = rtmp[*bcol]; 556 rtmp[*bcol++] = 0.0; 557 } 558 /* add k-th row into il and jl */ 559 il[k] = jmin; 560 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 561 } 562 } 563 } while (sctx.chshift); 564 ierr = PetscFree(il);CHKERRQ(ierr); 565 566 C->factor = MAT_FACTOR_CHOLESKY; 567 C->assembled = PETSC_TRUE; 568 C->preallocated = PETSC_TRUE; 569 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 570 if (sctx.nshift){ 571 if (shiftnz) { 572 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 573 } else if (shiftpd) { 574 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 575 } 576 } 577 PetscFunctionReturn(0); 578 } 579 580 #include "petscbt.h" 581 #include "src/mat/utils/freespace.h" 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatICCFactorSymbolic_SeqBAIJ" 584 PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 585 { 586 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 587 Mat_SeqSBAIJ *b; 588 Mat B; 589 PetscErrorCode ierr; 590 PetscTruth perm_identity; 591 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->rmap->bs,*ui; 592 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 593 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 594 PetscReal fill=info->fill,levels=info->levels; 595 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 596 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 597 PetscBT lnkbt; 598 599 PetscFunctionBegin; 600 if (bs > 1){ 601 if (!a->sbaijMat){ 602 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 603 } 604 (*fact)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */ 605 ierr = MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 606 PetscFunctionReturn(0); 607 } 608 609 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 610 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 611 612 /* special case that simply copies fill pattern */ 613 if (!levels && perm_identity) { 614 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 615 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 616 for (i=0; i<am; i++) { 617 ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 618 } 619 B = *fact; 620 ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 621 622 623 b = (Mat_SeqSBAIJ*)B->data; 624 uj = b->j; 625 for (i=0; i<am; i++) { 626 aj = a->j + a->diag[i]; 627 for (j=0; j<ui[i]; j++){ 628 *uj++ = *aj++; 629 } 630 b->ilen[i] = ui[i]; 631 } 632 ierr = PetscFree(ui);CHKERRQ(ierr); 633 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 634 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 635 636 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 637 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 638 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 639 PetscFunctionReturn(0); 640 } 641 642 /* initialization */ 643 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 644 ui[0] = 0; 645 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt),&cols_lvl);CHKERRQ(ierr); 646 647 /* jl: linked list for storing indices of the pivot rows 648 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 649 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 650 il = jl + am; 651 uj_ptr = (PetscInt**)(il + am); 652 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 653 for (i=0; i<am; i++){ 654 jl[i] = am; il[i] = 0; 655 } 656 657 /* create and initialize a linked list for storing column indices of the active row k */ 658 nlnk = am + 1; 659 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 660 661 /* initial FreeSpace size is fill*(ai[am]+1) */ 662 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 663 current_space = free_space; 664 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 665 current_space_lvl = free_space_lvl; 666 667 for (k=0; k<am; k++){ /* for each active row k */ 668 /* initialize lnk by the column indices of row rip[k] of A */ 669 nzk = 0; 670 ncols = ai[rip[k]+1] - ai[rip[k]]; 671 ncols_upper = 0; 672 cols = cols_lvl + am; 673 for (j=0; j<ncols; j++){ 674 i = rip[*(aj + ai[rip[k]] + j)]; 675 if (i >= k){ /* only take upper triangular entry */ 676 cols[ncols_upper] = i; 677 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 678 ncols_upper++; 679 } 680 } 681 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 682 nzk += nlnk; 683 684 /* update lnk by computing fill-in for each pivot row to be merged in */ 685 prow = jl[k]; /* 1st pivot row */ 686 687 while (prow < k){ 688 nextprow = jl[prow]; 689 690 /* merge prow into k-th row */ 691 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 692 jmax = ui[prow+1]; 693 ncols = jmax-jmin; 694 i = jmin - ui[prow]; 695 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 696 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 697 ierr = PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 698 nzk += nlnk; 699 700 /* update il and jl for prow */ 701 if (jmin < jmax){ 702 il[prow] = jmin; 703 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 704 } 705 prow = nextprow; 706 } 707 708 /* if free space is not available, make more free space */ 709 if (current_space->local_remaining<nzk) { 710 i = am - k + 1; /* num of unfactored rows */ 711 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 712 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 713 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 714 reallocs++; 715 } 716 717 /* copy data into free_space and free_space_lvl, then initialize lnk */ 718 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 719 720 /* add the k-th row into il and jl */ 721 if (nzk-1 > 0){ 722 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 723 jl[k] = jl[i]; jl[i] = k; 724 il[k] = ui[k] + 1; 725 } 726 uj_ptr[k] = current_space->array; 727 uj_lvl_ptr[k] = current_space_lvl->array; 728 729 current_space->array += nzk; 730 current_space->local_used += nzk; 731 current_space->local_remaining -= nzk; 732 733 current_space_lvl->array += nzk; 734 current_space_lvl->local_used += nzk; 735 current_space_lvl->local_remaining -= nzk; 736 737 ui[k+1] = ui[k] + nzk; 738 } 739 740 #if defined(PETSC_USE_INFO) 741 if (ai[am] != 0) { 742 PetscReal af = ((PetscReal)(2*ui[am]-am))/((PetscReal)ai[am]); 743 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 744 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 745 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 746 } else { 747 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 748 } 749 #endif 750 751 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 752 ierr = PetscFree(jl);CHKERRQ(ierr); 753 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 754 755 /* destroy list of free space and other temporary array(s) */ 756 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 757 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 758 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 759 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 760 761 /* put together the new matrix in MATSEQSBAIJ format */ 762 B = *fact; 763 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 764 765 b = (Mat_SeqSBAIJ*)B->data; 766 b->singlemalloc = PETSC_FALSE; 767 b->free_a = PETSC_TRUE; 768 b->free_ij = PETSC_TRUE; 769 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 770 b->j = uj; 771 b->i = ui; 772 b->diag = 0; 773 b->ilen = 0; 774 b->imax = 0; 775 b->row = perm; 776 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 777 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 778 b->icol = perm; 779 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 780 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 781 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 782 b->maxnz = b->nz = ui[am]; 783 784 B->factor = MAT_FACTOR_CHOLESKY; 785 B->info.factor_mallocs = reallocs; 786 B->info.fill_ratio_given = fill; 787 if (ai[am] != 0) { 788 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 789 } else { 790 B->info.fill_ratio_needed = 0.0; 791 } 792 if (perm_identity){ 793 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 794 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 795 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 796 } else { 797 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 798 } 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqBAIJ" 804 PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 805 { 806 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 807 Mat_SeqSBAIJ *b; 808 Mat B; 809 PetscErrorCode ierr; 810 PetscTruth perm_identity; 811 PetscReal fill = info->fill; 812 PetscInt *rip,i,mbs=a->mbs,bs=A->rmap->bs,*ai=a->i,*aj=a->j,reallocs=0,prow; 813 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 814 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 815 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 816 PetscBT lnkbt; 817 818 PetscFunctionBegin; 819 if (bs > 1) { /* convert to seqsbaij */ 820 if (!a->sbaijMat){ 821 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 822 } 823 (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */ 824 ierr = MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } 827 828 /* check whether perm is the identity mapping */ 829 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 830 if (!perm_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 831 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 832 833 /* initialization */ 834 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 835 ui[0] = 0; 836 837 /* jl: linked list for storing indices of the pivot rows 838 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 839 ierr = PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);CHKERRQ(ierr); 840 il = jl + mbs; 841 cols = il + mbs; 842 ui_ptr = (PetscInt**)(cols + mbs); 843 for (i=0; i<mbs; i++){ 844 jl[i] = mbs; il[i] = 0; 845 } 846 847 /* create and initialize a linked list for storing column indices of the active row k */ 848 nlnk = mbs + 1; 849 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 850 851 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 852 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);CHKERRQ(ierr); 853 current_space = free_space; 854 855 for (k=0; k<mbs; k++){ /* for each active row k */ 856 /* initialize lnk by the column indices of row rip[k] of A */ 857 nzk = 0; 858 ncols = ai[rip[k]+1] - ai[rip[k]]; 859 ncols_upper = 0; 860 for (j=0; j<ncols; j++){ 861 i = rip[*(aj + ai[rip[k]] + j)]; 862 if (i >= k){ /* only take upper triangular entry */ 863 cols[ncols_upper] = i; 864 ncols_upper++; 865 } 866 } 867 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 868 nzk += nlnk; 869 870 /* update lnk by computing fill-in for each pivot row to be merged in */ 871 prow = jl[k]; /* 1st pivot row */ 872 873 while (prow < k){ 874 nextprow = jl[prow]; 875 /* merge prow into k-th row */ 876 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 877 jmax = ui[prow+1]; 878 ncols = jmax-jmin; 879 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 880 ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 881 nzk += nlnk; 882 883 /* update il and jl for prow */ 884 if (jmin < jmax){ 885 il[prow] = jmin; 886 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 887 } 888 prow = nextprow; 889 } 890 891 /* if free space is not available, make more free space */ 892 if (current_space->local_remaining<nzk) { 893 i = mbs - k + 1; /* num of unfactored rows */ 894 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 895 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 896 reallocs++; 897 } 898 899 /* copy data into free space, then initialize lnk */ 900 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 901 902 /* add the k-th row into il and jl */ 903 if (nzk-1 > 0){ 904 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 905 jl[k] = jl[i]; jl[i] = k; 906 il[k] = ui[k] + 1; 907 } 908 ui_ptr[k] = current_space->array; 909 current_space->array += nzk; 910 current_space->local_used += nzk; 911 current_space->local_remaining -= nzk; 912 913 ui[k+1] = ui[k] + nzk; 914 } 915 916 #if defined(PETSC_USE_INFO) 917 if (ai[mbs] != 0) { 918 PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 919 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 920 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 921 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 922 } else { 923 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 924 } 925 #endif 926 927 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 928 ierr = PetscFree(jl);CHKERRQ(ierr); 929 930 /* destroy list of free space and other temporary array(s) */ 931 ierr = PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 932 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 933 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 934 935 /* put together the new matrix in MATSEQSBAIJ format */ 936 B = *fact; 937 ierr = MatSeqSBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 938 939 b = (Mat_SeqSBAIJ*)B->data; 940 b->singlemalloc = PETSC_FALSE; 941 b->free_a = PETSC_TRUE; 942 b->free_ij = PETSC_TRUE; 943 ierr = PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 944 b->j = uj; 945 b->i = ui; 946 b->diag = 0; 947 b->ilen = 0; 948 b->imax = 0; 949 b->row = perm; 950 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 951 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 952 b->icol = perm; 953 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 954 ierr = PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 955 ierr = PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 956 b->maxnz = b->nz = ui[mbs]; 957 958 B->factor = MAT_FACTOR_CHOLESKY; 959 B->info.factor_mallocs = reallocs; 960 B->info.fill_ratio_given = fill; 961 if (ai[mbs] != 0) { 962 B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]); 963 } else { 964 B->info.fill_ratio_needed = 0.0; 965 } 966 if (perm_identity){ 967 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 968 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 969 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 970 } else { 971 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 972 } 973 PetscFunctionReturn(0); 974 } 975