1 2 /* 3 Factorization code for BAIJ format. 4 */ 5 #include <../src/mat/impls/baij/seq/baij.h> 6 #include <petsc/private/kernels/blockinvert.h> 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2" 10 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat B,Mat A,const MatFactorInfo *info) 11 { 12 Mat C =B; 13 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; 14 IS isrow = b->row,isicol = b->icol; 15 PetscErrorCode ierr; 16 const PetscInt *r,*ic; 17 PetscInt i,j,k,nz,nzL,row,*pj; 18 const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,bs2=a->bs2; 19 const PetscInt *ajtmp,*bjtmp,*bdiag=b->diag; 20 MatScalar *rtmp,*pc,*mwork,*pv; 21 MatScalar *aa=a->a,*v; 22 PetscInt flg; 23 PetscReal shift = info->shiftamount; 24 PetscBool allowzeropivot,zeropivotdetected; 25 26 PetscFunctionBegin; 27 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 28 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 29 allowzeropivot = PetscNot(A->erroriffailure); 30 31 /* generate work space needed by the factorization */ 32 ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); 33 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 34 35 for (i=0; i<n; i++) { 36 /* zero rtmp */ 37 /* L part */ 38 nz = bi[i+1] - bi[i]; 39 bjtmp = bj + bi[i]; 40 for (j=0; j<nz; j++) { 41 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 42 } 43 44 /* U part */ 45 nz = bdiag[i] - bdiag[i+1]; 46 bjtmp = bj + bdiag[i+1]+1; 47 for (j=0; j<nz; j++) { 48 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 49 } 50 51 /* load in initial (unfactored row) */ 52 nz = ai[r[i]+1] - ai[r[i]]; 53 ajtmp = aj + ai[r[i]]; 54 v = aa + bs2*ai[r[i]]; 55 for (j=0; j<nz; j++) { 56 ierr = PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 57 } 58 59 /* elimination */ 60 bjtmp = bj + bi[i]; 61 nzL = bi[i+1] - bi[i]; 62 for (k=0; k < nzL; k++) { 63 row = bjtmp[k]; 64 pc = rtmp + bs2*row; 65 for (flg=0,j=0; j<bs2; j++) { 66 if (pc[j] != (PetscScalar)0.0) { 67 flg = 1; 68 break; 69 } 70 } 71 if (flg) { 72 pv = b->a + bs2*bdiag[row]; 73 /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 74 ierr = PetscKernel_A_gets_A_times_B_2(pc,pv,mwork);CHKERRQ(ierr); 75 76 pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ 77 pv = b->a + bs2*(bdiag[row+1]+1); 78 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ 79 for (j=0; j<nz; j++) { 80 /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 81 /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 82 v = rtmp + 4*pj[j]; 83 ierr = PetscKernel_A_gets_A_minus_B_times_C_2(v,pc,pv);CHKERRQ(ierr); 84 pv += 4; 85 } 86 ierr = PetscLogFlops(16*nz+12);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 87 } 88 } 89 90 /* finished row so stick it into b->a */ 91 /* L part */ 92 pv = b->a + bs2*bi[i]; 93 pj = b->j + bi[i]; 94 nz = bi[i+1] - bi[i]; 95 for (j=0; j<nz; j++) { 96 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 97 } 98 99 /* Mark diagonal and invert diagonal for simplier triangular solves */ 100 pv = b->a + bs2*bdiag[i]; 101 pj = b->j + bdiag[i]; 102 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 103 104 ierr = PetscKernel_A_gets_inverse_A_2(pv,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 105 if (zeropivotdetected) B->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 106 107 /* U part */ 108 pv = b->a + bs2*(bdiag[i+1]+1); 109 pj = b->j + bdiag[i+1]+1; 110 nz = bdiag[i] - bdiag[i+1] - 1; 111 for (j=0; j<nz; j++) { 112 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 113 } 114 } 115 116 ierr = PetscFree2(rtmp,mwork);CHKERRQ(ierr); 117 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 118 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 119 120 C->ops->solve = MatSolve_SeqBAIJ_2; 121 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 122 C->assembled = PETSC_TRUE; 123 124 ierr = PetscLogFlops(1.333333333333*2*2*2*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 130 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info) 131 { 132 Mat C =B; 133 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; 134 PetscErrorCode ierr; 135 PetscInt i,j,k,nz,nzL,row,*pj; 136 const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,bs2=a->bs2; 137 const PetscInt *ajtmp,*bjtmp,*bdiag=b->diag; 138 MatScalar *rtmp,*pc,*mwork,*pv; 139 MatScalar *aa=a->a,*v; 140 PetscInt flg; 141 PetscReal shift = info->shiftamount; 142 PetscBool allowzeropivot,zeropivotdetected; 143 144 PetscFunctionBegin; 145 allowzeropivot = PetscNot(A->erroriffailure); 146 147 /* generate work space needed by the factorization */ 148 ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); 149 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 150 151 for (i=0; i<n; i++) { 152 /* zero rtmp */ 153 /* L part */ 154 nz = bi[i+1] - bi[i]; 155 bjtmp = bj + bi[i]; 156 for (j=0; j<nz; j++) { 157 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 158 } 159 160 /* U part */ 161 nz = bdiag[i] - bdiag[i+1]; 162 bjtmp = bj + bdiag[i+1]+1; 163 for (j=0; j<nz; j++) { 164 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 165 } 166 167 /* load in initial (unfactored row) */ 168 nz = ai[i+1] - ai[i]; 169 ajtmp = aj + ai[i]; 170 v = aa + bs2*ai[i]; 171 for (j=0; j<nz; j++) { 172 ierr = PetscMemcpy(rtmp+bs2*ajtmp[j],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 173 } 174 175 /* elimination */ 176 bjtmp = bj + bi[i]; 177 nzL = bi[i+1] - bi[i]; 178 for (k=0; k < nzL; k++) { 179 row = bjtmp[k]; 180 pc = rtmp + bs2*row; 181 for (flg=0,j=0; j<bs2; j++) { 182 if (pc[j]!=(PetscScalar)0.0) { 183 flg = 1; 184 break; 185 } 186 } 187 if (flg) { 188 pv = b->a + bs2*bdiag[row]; 189 /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 190 ierr = PetscKernel_A_gets_A_times_B_2(pc,pv,mwork);CHKERRQ(ierr); 191 192 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 193 pv = b->a + bs2*(bdiag[row+1]+1); 194 nz = bdiag[row]-bdiag[row+1] - 1; /* num of entries in U(row,:) excluding diag */ 195 for (j=0; j<nz; j++) { 196 /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 197 /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 198 v = rtmp + 4*pj[j]; 199 ierr = PetscKernel_A_gets_A_minus_B_times_C_2(v,pc,pv);CHKERRQ(ierr); 200 pv += 4; 201 } 202 ierr = PetscLogFlops(16*nz+12);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 203 } 204 } 205 206 /* finished row so stick it into b->a */ 207 /* L part */ 208 pv = b->a + bs2*bi[i]; 209 pj = b->j + bi[i]; 210 nz = bi[i+1] - bi[i]; 211 for (j=0; j<nz; j++) { 212 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 213 } 214 215 /* Mark diagonal and invert diagonal for simplier triangular solves */ 216 pv = b->a + bs2*bdiag[i]; 217 pj = b->j + bdiag[i]; 218 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 219 220 ierr = PetscKernel_A_gets_inverse_A_2(pv,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 221 if (zeropivotdetected) B->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 222 223 /* U part */ 224 /* 225 pv = b->a + bs2*bi[2*n-i]; 226 pj = b->j + bi[2*n-i]; 227 nz = bi[2*n-i+1] - bi[2*n-i] - 1; 228 */ 229 pv = b->a + bs2*(bdiag[i+1]+1); 230 pj = b->j + bdiag[i+1]+1; 231 nz = bdiag[i] - bdiag[i+1] - 1; 232 for (j=0; j<nz; j++) { 233 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 234 } 235 } 236 ierr = PetscFree2(rtmp,mwork);CHKERRQ(ierr); 237 238 C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 239 C->ops->forwardsolve = MatForwardSolve_SeqBAIJ_2_NaturalOrdering; 240 C->ops->backwardsolve = MatBackwardSolve_SeqBAIJ_2_NaturalOrdering; 241 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 242 C->assembled = PETSC_TRUE; 243 244 ierr = PetscLogFlops(1.333333333333*2*2*2*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ 245 PetscFunctionReturn(0); 246 } 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_inplace" 250 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_inplace(Mat B,Mat A,const MatFactorInfo *info) 251 { 252 Mat C = B; 253 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data; 254 IS isrow = b->row,isicol = b->icol; 255 PetscErrorCode ierr; 256 const PetscInt *r,*ic; 257 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 258 PetscInt *ajtmpold,*ajtmp,nz,row; 259 PetscInt *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 260 MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 261 MatScalar p1,p2,p3,p4; 262 MatScalar *ba = b->a,*aa = a->a; 263 PetscReal shift = info->shiftamount; 264 PetscBool allowzeropivot,zeropivotdetected; 265 266 PetscFunctionBegin; 267 allowzeropivot = PetscNot(A->erroriffailure); 268 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 269 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 270 ierr = PetscMalloc1(4*(n+1),&rtmp);CHKERRQ(ierr); 271 272 for (i=0; i<n; i++) { 273 nz = bi[i+1] - bi[i]; 274 ajtmp = bj + bi[i]; 275 for (j=0; j<nz; j++) { 276 x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 277 } 278 /* load in initial (unfactored row) */ 279 idx = r[i]; 280 nz = ai[idx+1] - ai[idx]; 281 ajtmpold = aj + ai[idx]; 282 v = aa + 4*ai[idx]; 283 for (j=0; j<nz; j++) { 284 x = rtmp+4*ic[ajtmpold[j]]; 285 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 286 v += 4; 287 } 288 row = *ajtmp++; 289 while (row < i) { 290 pc = rtmp + 4*row; 291 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 292 if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) { 293 pv = ba + 4*diag_offset[row]; 294 pj = bj + diag_offset[row] + 1; 295 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 296 pc[0] = m1 = p1*x1 + p3*x2; 297 pc[1] = m2 = p2*x1 + p4*x2; 298 pc[2] = m3 = p1*x3 + p3*x4; 299 pc[3] = m4 = p2*x3 + p4*x4; 300 nz = bi[row+1] - diag_offset[row] - 1; 301 pv += 4; 302 for (j=0; j<nz; j++) { 303 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 304 x = rtmp + 4*pj[j]; 305 x[0] -= m1*x1 + m3*x2; 306 x[1] -= m2*x1 + m4*x2; 307 x[2] -= m1*x3 + m3*x4; 308 x[3] -= m2*x3 + m4*x4; 309 pv += 4; 310 } 311 ierr = PetscLogFlops(16.0*nz+12.0);CHKERRQ(ierr); 312 } 313 row = *ajtmp++; 314 } 315 /* finished row so stick it into b->a */ 316 pv = ba + 4*bi[i]; 317 pj = bj + bi[i]; 318 nz = bi[i+1] - bi[i]; 319 for (j=0; j<nz; j++) { 320 x = rtmp+4*pj[j]; 321 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 322 pv += 4; 323 } 324 /* invert diagonal block */ 325 w = ba + 4*diag_offset[i]; 326 ierr = PetscKernel_A_gets_inverse_A_2(w,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 327 if (zeropivotdetected) C->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 328 } 329 330 ierr = PetscFree(rtmp);CHKERRQ(ierr); 331 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 332 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 333 334 C->ops->solve = MatSolve_SeqBAIJ_2_inplace; 335 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_inplace; 336 C->assembled = PETSC_TRUE; 337 338 ierr = PetscLogFlops(1.333333333333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 339 PetscFunctionReturn(0); 340 } 341 /* 342 Version for when blocks are 2 by 2 Using natural ordering 343 */ 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace" 346 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info) 347 { 348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data; 349 PetscErrorCode ierr; 350 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 351 PetscInt *ajtmpold,*ajtmp,nz,row; 352 PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 353 MatScalar *pv,*v,*rtmp,*pc,*w,*x; 354 MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 355 MatScalar *ba = b->a,*aa = a->a; 356 PetscReal shift = info->shiftamount; 357 PetscBool allowzeropivot,zeropivotdetected; 358 359 PetscFunctionBegin; 360 allowzeropivot = PetscNot(A->erroriffailure); 361 ierr = PetscMalloc1(4*(n+1),&rtmp);CHKERRQ(ierr); 362 for (i=0; i<n; i++) { 363 nz = bi[i+1] - bi[i]; 364 ajtmp = bj + bi[i]; 365 for (j=0; j<nz; j++) { 366 x = rtmp+4*ajtmp[j]; 367 x[0] = x[1] = x[2] = x[3] = 0.0; 368 } 369 /* load in initial (unfactored row) */ 370 nz = ai[i+1] - ai[i]; 371 ajtmpold = aj + ai[i]; 372 v = aa + 4*ai[i]; 373 for (j=0; j<nz; j++) { 374 x = rtmp+4*ajtmpold[j]; 375 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 376 v += 4; 377 } 378 row = *ajtmp++; 379 while (row < i) { 380 pc = rtmp + 4*row; 381 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 382 if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) { 383 pv = ba + 4*diag_offset[row]; 384 pj = bj + diag_offset[row] + 1; 385 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 386 pc[0] = m1 = p1*x1 + p3*x2; 387 pc[1] = m2 = p2*x1 + p4*x2; 388 pc[2] = m3 = p1*x3 + p3*x4; 389 pc[3] = m4 = p2*x3 + p4*x4; 390 nz = bi[row+1] - diag_offset[row] - 1; 391 pv += 4; 392 for (j=0; j<nz; j++) { 393 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 394 x = rtmp + 4*pj[j]; 395 x[0] -= m1*x1 + m3*x2; 396 x[1] -= m2*x1 + m4*x2; 397 x[2] -= m1*x3 + m3*x4; 398 x[3] -= m2*x3 + m4*x4; 399 pv += 4; 400 } 401 ierr = PetscLogFlops(16.0*nz+12.0);CHKERRQ(ierr); 402 } 403 row = *ajtmp++; 404 } 405 /* finished row so stick it into b->a */ 406 pv = ba + 4*bi[i]; 407 pj = bj + bi[i]; 408 nz = bi[i+1] - bi[i]; 409 for (j=0; j<nz; j++) { 410 x = rtmp+4*pj[j]; 411 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 412 /* 413 printf(" col %d:",pj[j]); 414 PetscInt j1; 415 for (j1=0; j1<4; j1++) printf(" %g,",*(pv+j1)); 416 printf("\n"); 417 */ 418 pv += 4; 419 } 420 /* invert diagonal block */ 421 w = ba + 4*diag_offset[i]; 422 ierr = PetscKernel_A_gets_inverse_A_2(w,shift, allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 423 if (zeropivotdetected) C->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 424 } 425 426 ierr = PetscFree(rtmp);CHKERRQ(ierr); 427 428 C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering_inplace; 429 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering_inplace; 430 C->assembled = PETSC_TRUE; 431 432 ierr = PetscLogFlops(1.333333333333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 433 PetscFunctionReturn(0); 434 } 435 436 /* ----------------------------------------------------------- */ 437 /* 438 Version for when blocks are 1 by 1. 439 */ 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1" 442 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat B,Mat A,const MatFactorInfo *info) 443 { 444 Mat C =B; 445 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; 446 IS isrow = b->row,isicol = b->icol; 447 PetscErrorCode ierr; 448 const PetscInt *r,*ic,*ics; 449 const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 450 PetscInt i,j,k,nz,nzL,row,*pj; 451 const PetscInt *ajtmp,*bjtmp; 452 MatScalar *rtmp,*pc,multiplier,*pv; 453 const MatScalar *aa=a->a,*v; 454 PetscBool row_identity,col_identity; 455 FactorShiftCtx sctx; 456 const PetscInt *ddiag; 457 PetscReal rs; 458 MatScalar d; 459 460 PetscFunctionBegin; 461 /* MatPivotSetUp(): initialize shift context sctx */ 462 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 463 464 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 465 ddiag = a->diag; 466 sctx.shift_top = info->zeropivot; 467 for (i=0; i<n; i++) { 468 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 469 d = (aa)[ddiag[i]]; 470 rs = -PetscAbsScalar(d) - PetscRealPart(d); 471 v = aa+ai[i]; 472 nz = ai[i+1] - ai[i]; 473 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 474 if (rs>sctx.shift_top) sctx.shift_top = rs; 475 } 476 sctx.shift_top *= 1.1; 477 sctx.nshift_max = 5; 478 sctx.shift_lo = 0.; 479 sctx.shift_hi = 1.; 480 } 481 482 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 483 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 484 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 485 ics = ic; 486 487 do { 488 sctx.newshift = PETSC_FALSE; 489 for (i=0; i<n; i++) { 490 /* zero rtmp */ 491 /* L part */ 492 nz = bi[i+1] - bi[i]; 493 bjtmp = bj + bi[i]; 494 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 495 496 /* U part */ 497 nz = bdiag[i]-bdiag[i+1]; 498 bjtmp = bj + bdiag[i+1]+1; 499 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 500 501 /* load in initial (unfactored row) */ 502 nz = ai[r[i]+1] - ai[r[i]]; 503 ajtmp = aj + ai[r[i]]; 504 v = aa + ai[r[i]]; 505 for (j=0; j<nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 506 507 /* ZeropivotApply() */ 508 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 509 510 /* elimination */ 511 bjtmp = bj + bi[i]; 512 row = *bjtmp++; 513 nzL = bi[i+1] - bi[i]; 514 for (k=0; k < nzL; k++) { 515 pc = rtmp + row; 516 if (*pc != (PetscScalar)0.0) { 517 pv = b->a + bdiag[row]; 518 multiplier = *pc * (*pv); 519 *pc = multiplier; 520 521 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 522 pv = b->a + bdiag[row+1]+1; 523 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 524 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 525 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 526 } 527 row = *bjtmp++; 528 } 529 530 /* finished row so stick it into b->a */ 531 rs = 0.0; 532 /* L part */ 533 pv = b->a + bi[i]; 534 pj = b->j + bi[i]; 535 nz = bi[i+1] - bi[i]; 536 for (j=0; j<nz; j++) { 537 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 538 } 539 540 /* U part */ 541 pv = b->a + bdiag[i+1]+1; 542 pj = b->j + bdiag[i+1]+1; 543 nz = bdiag[i] - bdiag[i+1]-1; 544 for (j=0; j<nz; j++) { 545 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 546 } 547 548 sctx.rs = rs; 549 sctx.pv = rtmp[i]; 550 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 551 if (sctx.newshift) break; /* break for-loop */ 552 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 553 554 /* Mark diagonal and invert diagonal for simplier triangular solves */ 555 pv = b->a + bdiag[i]; 556 *pv = (PetscScalar)1.0/rtmp[i]; 557 558 } /* endof for (i=0; i<n; i++) { */ 559 560 /* MatPivotRefine() */ 561 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 562 /* 563 * if no shift in this attempt & shifting & started shifting & can refine, 564 * then try lower shift 565 */ 566 sctx.shift_hi = sctx.shift_fraction; 567 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 568 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 569 sctx.newshift = PETSC_TRUE; 570 sctx.nshift++; 571 } 572 } while (sctx.newshift); 573 574 ierr = PetscFree(rtmp);CHKERRQ(ierr); 575 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 576 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 577 578 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 579 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 580 if (row_identity && col_identity) { 581 C->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering; 582 C->ops->forwardsolve = MatForwardSolve_SeqBAIJ_1_NaturalOrdering; 583 C->ops->backwardsolve = MatBackwardSolve_SeqBAIJ_1_NaturalOrdering; 584 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering; 585 } else { 586 C->ops->solve = MatSolve_SeqBAIJ_1; 587 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 588 } 589 C->assembled = PETSC_TRUE; 590 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 591 592 /* MatShiftView(A,info,&sctx) */ 593 if (sctx.nshift) { 594 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 595 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 596 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 597 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 598 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 599 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 600 } 601 } 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1_inplace" 607 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo *info) 608 { 609 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data; 610 IS isrow = b->row,isicol = b->icol; 611 PetscErrorCode ierr; 612 const PetscInt *r,*ic; 613 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 614 PetscInt *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 615 PetscInt *diag_offset = b->diag,diag,*pj; 616 MatScalar *pv,*v,*rtmp,multiplier,*pc; 617 MatScalar *ba = b->a,*aa = a->a; 618 PetscBool row_identity, col_identity; 619 620 PetscFunctionBegin; 621 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 622 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 623 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 624 625 for (i=0; i<n; i++) { 626 nz = bi[i+1] - bi[i]; 627 ajtmp = bj + bi[i]; 628 for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 629 630 /* load in initial (unfactored row) */ 631 nz = ai[r[i]+1] - ai[r[i]]; 632 ajtmpold = aj + ai[r[i]]; 633 v = aa + ai[r[i]]; 634 for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 635 636 row = *ajtmp++; 637 while (row < i) { 638 pc = rtmp + row; 639 if (*pc != 0.0) { 640 pv = ba + diag_offset[row]; 641 pj = bj + diag_offset[row] + 1; 642 multiplier = *pc * *pv++; 643 *pc = multiplier; 644 nz = bi[row+1] - diag_offset[row] - 1; 645 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 646 ierr = PetscLogFlops(1.0+2.0*nz);CHKERRQ(ierr); 647 } 648 row = *ajtmp++; 649 } 650 /* finished row so stick it into b->a */ 651 pv = ba + bi[i]; 652 pj = bj + bi[i]; 653 nz = bi[i+1] - bi[i]; 654 for (j=0; j<nz; j++) pv[j] = rtmp[pj[j]]; 655 diag = diag_offset[i] - bi[i]; 656 /* check pivot entry for current row */ 657 if (pv[diag] == 0.0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot: row in original ordering %D in permuted ordering %D",r[i],i); 658 pv[diag] = 1.0/pv[diag]; 659 } 660 661 ierr = PetscFree(rtmp);CHKERRQ(ierr); 662 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 663 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 664 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 665 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 666 if (row_identity && col_identity) { 667 C->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering_inplace; 668 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering_inplace; 669 } else { 670 C->ops->solve = MatSolve_SeqBAIJ_1_inplace; 671 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_inplace; 672 } 673 C->assembled = PETSC_TRUE; 674 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 675 PetscFunctionReturn(0); 676 } 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatGetFactor_seqbaij_petsc" 680 PETSC_INTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat A,const MatFactorType ftype,Mat *B) 681 { 682 PetscInt n = A->rmap->n; 683 PetscErrorCode ierr; 684 685 PetscFunctionBegin; 686 #if defined(PETSC_USE_COMPLEX) 687 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 688 #endif 689 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 690 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 691 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 692 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 693 694 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqBAIJ; 695 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqBAIJ; 696 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 697 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 698 ierr = MatSeqSBAIJSetPreallocation(*B,A->rmap->bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 699 700 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqBAIJ; 701 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqBAIJ; 702 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 703 (*B)->factortype = ftype; 704 705 ierr = PetscFree((*B)->solvertype);CHKERRQ(ierr); 706 ierr = PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 /* ----------------------------------------------------------- */ 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatLUFactor_SeqBAIJ" 713 PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 714 { 715 PetscErrorCode ierr; 716 Mat C; 717 718 PetscFunctionBegin; 719 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 720 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 721 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 722 723 A->ops->solve = C->ops->solve; 724 A->ops->solvetranspose = C->ops->solvetranspose; 725 726 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 727 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqBAIJ*)(A->data))->icol);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #include <../src/mat/impls/sbaij/seq/sbaij.h> 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N" 734 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat C,Mat A,const MatFactorInfo *info) 735 { 736 PetscErrorCode ierr; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 738 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 739 IS ip=b->row; 740 const PetscInt *rip; 741 PetscInt i,j,mbs=a->mbs,bs=A->rmap->bs,*bi=b->i,*bj=b->j,*bcol; 742 PetscInt *ai=a->i,*aj=a->j; 743 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 744 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 745 PetscReal rs; 746 FactorShiftCtx sctx; 747 748 PetscFunctionBegin; 749 if (bs > 1) { /* convert A to a SBAIJ matrix and apply Cholesky factorization from it */ 750 if (!a->sbaijMat) { 751 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 752 } 753 ierr = (a->sbaijMat)->ops->choleskyfactornumeric(C,a->sbaijMat,info);CHKERRQ(ierr); 754 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 /* MatPivotSetUp(): initialize shift context sctx */ 759 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 760 761 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 762 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr); 763 764 sctx.shift_amount = 0.; 765 sctx.nshift = 0; 766 do { 767 sctx.newshift = PETSC_FALSE; 768 for (i=0; i<mbs; i++) { 769 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 770 } 771 772 for (k = 0; k<mbs; k++) { 773 bval = ba + bi[k]; 774 /* initialize k-th row by the perm[k]-th row of A */ 775 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 776 for (j = jmin; j < jmax; j++) { 777 col = rip[aj[j]]; 778 if (col >= k) { /* only take upper triangular entry */ 779 rtmp[col] = aa[j]; 780 *bval++ = 0.0; /* for in-place factorization */ 781 } 782 } 783 784 /* shift the diagonal of the matrix */ 785 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 786 787 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 788 dk = rtmp[k]; 789 i = jl[k]; /* first row to be added to k_th row */ 790 791 while (i < k) { 792 nexti = jl[i]; /* next row to be added to k_th row */ 793 794 /* compute multiplier, update diag(k) and U(i,k) */ 795 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 796 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 797 dk += uikdi*ba[ili]; 798 ba[ili] = uikdi; /* -U(i,k) */ 799 800 /* add multiple of row i to k-th row */ 801 jmin = ili + 1; jmax = bi[i+1]; 802 if (jmin < jmax) { 803 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 804 /* update il and jl for row i */ 805 il[i] = jmin; 806 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 807 } 808 i = nexti; 809 } 810 811 /* shift the diagonals when zero pivot is detected */ 812 /* compute rs=sum of abs(off-diagonal) */ 813 rs = 0.0; 814 jmin = bi[k]+1; 815 nz = bi[k+1] - jmin; 816 if (nz) { 817 bcol = bj + jmin; 818 while (nz--) { 819 rs += PetscAbsScalar(rtmp[*bcol]); 820 bcol++; 821 } 822 } 823 824 sctx.rs = rs; 825 sctx.pv = dk; 826 ierr = MatPivotCheck(C,A,info,&sctx,k);CHKERRQ(ierr); 827 if (sctx.newshift) break; 828 dk = sctx.pv; 829 830 /* copy data into U(k,:) */ 831 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 832 jmin = bi[k]+1; jmax = bi[k+1]; 833 if (jmin < jmax) { 834 for (j=jmin; j<jmax; j++) { 835 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 836 } 837 /* add the k-th row into il and jl */ 838 il[k] = jmin; 839 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 840 } 841 } 842 } while (sctx.newshift); 843 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 844 845 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 846 847 C->assembled = PETSC_TRUE; 848 C->preallocated = PETSC_TRUE; 849 850 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 851 if (sctx.nshift) { 852 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 853 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 854 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 855 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 856 } 857 } 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering" 863 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info) 864 { 865 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 866 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 867 PetscErrorCode ierr; 868 PetscInt i,j,am=a->mbs; 869 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 870 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz; 871 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 872 PetscReal rs; 873 FactorShiftCtx sctx; 874 875 PetscFunctionBegin; 876 /* MatPivotSetUp(): initialize shift context sctx */ 877 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 878 879 ierr = PetscMalloc3(am,&rtmp,am,&il,am,&jl);CHKERRQ(ierr); 880 881 do { 882 sctx.newshift = PETSC_FALSE; 883 for (i=0; i<am; i++) { 884 rtmp[i] = 0.0; jl[i] = am; il[0] = 0; 885 } 886 887 for (k = 0; k<am; k++) { 888 /* initialize k-th row with elements nonzero in row perm(k) of A */ 889 nz = ai[k+1] - ai[k]; 890 acol = aj + ai[k]; 891 aval = aa + ai[k]; 892 bval = ba + bi[k]; 893 while (nz--) { 894 if (*acol < k) { /* skip lower triangular entries */ 895 acol++; aval++; 896 } else { 897 rtmp[*acol++] = *aval++; 898 *bval++ = 0.0; /* for in-place factorization */ 899 } 900 } 901 902 /* shift the diagonal of the matrix */ 903 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 904 905 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 906 dk = rtmp[k]; 907 i = jl[k]; /* first row to be added to k_th row */ 908 909 while (i < k) { 910 nexti = jl[i]; /* next row to be added to k_th row */ 911 /* compute multiplier, update D(k) and U(i,k) */ 912 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 913 uikdi = -ba[ili]*ba[bi[i]]; 914 dk += uikdi*ba[ili]; 915 ba[ili] = uikdi; /* -U(i,k) */ 916 917 /* add multiple of row i to k-th row ... */ 918 jmin = ili + 1; 919 nz = bi[i+1] - jmin; 920 if (nz > 0) { 921 bcol = bj + jmin; 922 bval = ba + jmin; 923 while (nz--) rtmp[*bcol++] += uikdi*(*bval++); 924 /* update il and jl for i-th row */ 925 il[i] = jmin; 926 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 927 } 928 i = nexti; 929 } 930 931 /* shift the diagonals when zero pivot is detected */ 932 /* compute rs=sum of abs(off-diagonal) */ 933 rs = 0.0; 934 jmin = bi[k]+1; 935 nz = bi[k+1] - jmin; 936 if (nz) { 937 bcol = bj + jmin; 938 while (nz--) { 939 rs += PetscAbsScalar(rtmp[*bcol]); 940 bcol++; 941 } 942 } 943 944 sctx.rs = rs; 945 sctx.pv = dk; 946 ierr = MatPivotCheck(C,A,info,&sctx,k);CHKERRQ(ierr); 947 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 948 dk = sctx.pv; 949 950 /* copy data into U(k,:) */ 951 ba[bi[k]] = 1.0/dk; 952 jmin = bi[k]+1; 953 nz = bi[k+1] - jmin; 954 if (nz) { 955 bcol = bj + jmin; 956 bval = ba + jmin; 957 while (nz--) { 958 *bval++ = rtmp[*bcol]; 959 rtmp[*bcol++] = 0.0; 960 } 961 /* add k-th row into il and jl */ 962 il[k] = jmin; 963 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 964 } 965 } 966 } while (sctx.newshift); 967 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 968 969 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 970 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 971 C->assembled = PETSC_TRUE; 972 C->preallocated = PETSC_TRUE; 973 974 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 975 if (sctx.nshift) { 976 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 977 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 978 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 979 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 980 } 981 } 982 PetscFunctionReturn(0); 983 } 984 985 #include <petscbt.h> 986 #include <../src/mat/utils/freespace.h> 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatICCFactorSymbolic_SeqBAIJ" 989 PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 990 { 991 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 992 Mat_SeqSBAIJ *b; 993 Mat B; 994 PetscErrorCode ierr; 995 PetscBool perm_identity,missing; 996 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->rmap->bs,*ui; 997 const PetscInt *rip; 998 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 999 PetscInt nlnk,*lnk,*lnk_lvl=NULL,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 1000 PetscReal fill =info->fill,levels=info->levels; 1001 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1002 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1003 PetscBT lnkbt; 1004 1005 PetscFunctionBegin; 1006 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1007 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1008 1009 if (bs > 1) { 1010 if (!a->sbaijMat) { 1011 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 1012 } 1013 (fact)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */ 1014 1015 ierr = MatICCFactorSymbolic(fact,a->sbaijMat,perm,info);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1020 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1021 1022 /* special case that simply copies fill pattern */ 1023 if (!levels && perm_identity) { 1024 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 1025 for (i=0; i<am; i++) ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 1026 B = fact; 1027 ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 1028 1029 1030 b = (Mat_SeqSBAIJ*)B->data; 1031 uj = b->j; 1032 for (i=0; i<am; i++) { 1033 aj = a->j + a->diag[i]; 1034 for (j=0; j<ui[i]; j++) *uj++ = *aj++; 1035 b->ilen[i] = ui[i]; 1036 } 1037 ierr = PetscFree(ui);CHKERRQ(ierr); 1038 1039 B->factortype = MAT_FACTOR_NONE; 1040 1041 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1042 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1043 B->factortype = MAT_FACTOR_ICC; 1044 1045 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 1046 PetscFunctionReturn(0); 1047 } 1048 1049 /* initialization */ 1050 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 1051 ui[0] = 0; 1052 ierr = PetscMalloc1(2*am+1,&cols_lvl);CHKERRQ(ierr); 1053 1054 /* jl: linked list for storing indices of the pivot rows 1055 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1056 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&il,am,&jl);CHKERRQ(ierr); 1057 for (i=0; i<am; i++) { 1058 jl[i] = am; il[i] = 0; 1059 } 1060 1061 /* create and initialize a linked list for storing column indices of the active row k */ 1062 nlnk = am + 1; 1063 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1064 1065 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 1066 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am]/2,am/2)),&free_space);CHKERRQ(ierr); 1067 1068 current_space = free_space; 1069 1070 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am]/2,am/2)),&free_space_lvl);CHKERRQ(ierr); 1071 current_space_lvl = free_space_lvl; 1072 1073 for (k=0; k<am; k++) { /* for each active row k */ 1074 /* initialize lnk by the column indices of row rip[k] of A */ 1075 nzk = 0; 1076 ncols = ai[rip[k]+1] - ai[rip[k]]; 1077 ncols_upper = 0; 1078 cols = cols_lvl + am; 1079 for (j=0; j<ncols; j++) { 1080 i = rip[*(aj + ai[rip[k]] + j)]; 1081 if (i >= k) { /* only take upper triangular entry */ 1082 cols[ncols_upper] = i; 1083 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1084 ncols_upper++; 1085 } 1086 } 1087 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1088 nzk += nlnk; 1089 1090 /* update lnk by computing fill-in for each pivot row to be merged in */ 1091 prow = jl[k]; /* 1st pivot row */ 1092 1093 while (prow < k) { 1094 nextprow = jl[prow]; 1095 1096 /* merge prow into k-th row */ 1097 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1098 jmax = ui[prow+1]; 1099 ncols = jmax-jmin; 1100 i = jmin - ui[prow]; 1101 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1102 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1103 ierr = PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1104 nzk += nlnk; 1105 1106 /* update il and jl for prow */ 1107 if (jmin < jmax) { 1108 il[prow] = jmin; 1109 1110 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1111 } 1112 prow = nextprow; 1113 } 1114 1115 /* if free space is not available, make more free space */ 1116 if (current_space->local_remaining<nzk) { 1117 i = am - k + 1; /* num of unfactored rows */ 1118 i = PetscMin(PetscIntMultTruncate(i,nzk), PetscIntMultTruncate(i,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1119 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1120 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1121 reallocs++; 1122 } 1123 1124 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1125 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1126 1127 /* add the k-th row into il and jl */ 1128 if (nzk-1 > 0) { 1129 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1130 jl[k] = jl[i]; jl[i] = k; 1131 il[k] = ui[k] + 1; 1132 } 1133 uj_ptr[k] = current_space->array; 1134 uj_lvl_ptr[k] = current_space_lvl->array; 1135 1136 current_space->array += nzk; 1137 current_space->local_used += nzk; 1138 current_space->local_remaining -= nzk; 1139 1140 current_space_lvl->array += nzk; 1141 current_space_lvl->local_used += nzk; 1142 current_space_lvl->local_remaining -= nzk; 1143 1144 ui[k+1] = ui[k] + nzk; 1145 } 1146 1147 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1148 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,il,jl);CHKERRQ(ierr); 1149 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1150 1151 /* copy free_space into uj and free free_space; set uj in new datastructure; */ 1152 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 1153 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1154 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1155 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1156 1157 /* put together the new matrix in MATSEQSBAIJ format */ 1158 B = fact; 1159 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1160 1161 b = (Mat_SeqSBAIJ*)B->data; 1162 b->singlemalloc = PETSC_FALSE; 1163 b->free_a = PETSC_TRUE; 1164 b->free_ij = PETSC_TRUE; 1165 1166 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 1167 1168 b->j = uj; 1169 b->i = ui; 1170 b->diag = 0; 1171 b->ilen = 0; 1172 b->imax = 0; 1173 b->row = perm; 1174 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1175 1176 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1177 1178 b->icol = perm; 1179 1180 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1181 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 1182 ierr = PetscLogObjectMemory((PetscObject)B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1183 1184 b->maxnz = b->nz = ui[am]; 1185 1186 B->info.factor_mallocs = reallocs; 1187 B->info.fill_ratio_given = fill; 1188 if (ai[am] != 0.) { 1189 /* nonzeros in lower triangular part of A (includign diagonals)= (ai[am]+am)/2 */ 1190 B->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 1191 } else { 1192 B->info.fill_ratio_needed = 0.0; 1193 } 1194 #if defined(PETSC_USE_INFO) 1195 if (ai[am] != 0) { 1196 PetscReal af = B->info.fill_ratio_needed; 1197 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 1198 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1199 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 1200 } else { 1201 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1202 } 1203 #endif 1204 if (perm_identity) { 1205 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1206 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1207 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 1208 } else { 1209 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 1210 } 1211 PetscFunctionReturn(0); 1212 } 1213 1214 #undef __FUNCT__ 1215 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqBAIJ" 1216 PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1217 { 1218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1219 Mat_SeqSBAIJ *b; 1220 Mat B; 1221 PetscErrorCode ierr; 1222 PetscBool perm_identity,missing; 1223 PetscReal fill = info->fill; 1224 const PetscInt *rip; 1225 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,*ai=a->i,*aj=a->j,reallocs=0,prow; 1226 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1227 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1228 PetscFreeSpaceList free_space=NULL,current_space=NULL; 1229 PetscBT lnkbt; 1230 1231 PetscFunctionBegin; 1232 if (bs > 1) { /* convert to seqsbaij */ 1233 if (!a->sbaijMat) { 1234 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 1235 } 1236 (fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */ 1237 1238 ierr = MatCholeskyFactorSymbolic(fact,a->sbaijMat,perm,info);CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1243 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1244 1245 /* check whether perm is the identity mapping */ 1246 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1247 if (!perm_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported"); 1248 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1249 1250 /* initialization */ 1251 ierr = PetscMalloc1(mbs+1,&ui);CHKERRQ(ierr); 1252 ui[0] = 0; 1253 1254 /* jl: linked list for storing indices of the pivot rows 1255 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1256 ierr = PetscMalloc4(mbs,&ui_ptr,mbs,&il,mbs,&jl,mbs,&cols);CHKERRQ(ierr); 1257 for (i=0; i<mbs; i++) { 1258 jl[i] = mbs; il[i] = 0; 1259 } 1260 1261 /* create and initialize a linked list for storing column indices of the active row k */ 1262 nlnk = mbs + 1; 1263 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1264 1265 /* initial FreeSpace size is fill* (ai[mbs]+mbs)/2 */ 1266 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[mbs]/2,mbs/2)),&free_space);CHKERRQ(ierr); 1267 1268 current_space = free_space; 1269 1270 for (k=0; k<mbs; k++) { /* for each active row k */ 1271 /* initialize lnk by the column indices of row rip[k] of A */ 1272 nzk = 0; 1273 ncols = ai[rip[k]+1] - ai[rip[k]]; 1274 ncols_upper = 0; 1275 for (j=0; j<ncols; j++) { 1276 i = rip[*(aj + ai[rip[k]] + j)]; 1277 if (i >= k) { /* only take upper triangular entry */ 1278 cols[ncols_upper] = i; 1279 ncols_upper++; 1280 } 1281 } 1282 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1283 nzk += nlnk; 1284 1285 /* update lnk by computing fill-in for each pivot row to be merged in */ 1286 prow = jl[k]; /* 1st pivot row */ 1287 1288 while (prow < k) { 1289 nextprow = jl[prow]; 1290 /* merge prow into k-th row */ 1291 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1292 jmax = ui[prow+1]; 1293 ncols = jmax-jmin; 1294 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1295 ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1296 nzk += nlnk; 1297 1298 /* update il and jl for prow */ 1299 if (jmin < jmax) { 1300 il[prow] = jmin; 1301 j = *uj_ptr; 1302 jl[prow] = jl[j]; 1303 jl[j] = prow; 1304 } 1305 prow = nextprow; 1306 } 1307 1308 /* if free space is not available, make more free space */ 1309 if (current_space->local_remaining<nzk) { 1310 i = mbs - k + 1; /* num of unfactored rows */ 1311 i = PetscMin(PetscIntMultTruncate(i,nzk), PetscIntMultTruncate(i,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1312 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1313 reallocs++; 1314 } 1315 1316 /* copy data into free space, then initialize lnk */ 1317 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1318 1319 /* add the k-th row into il and jl */ 1320 if (nzk-1 > 0) { 1321 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1322 jl[k] = jl[i]; jl[i] = k; 1323 il[k] = ui[k] + 1; 1324 } 1325 ui_ptr[k] = current_space->array; 1326 current_space->array += nzk; 1327 current_space->local_used += nzk; 1328 current_space->local_remaining -= nzk; 1329 1330 ui[k+1] = ui[k] + nzk; 1331 } 1332 1333 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1334 ierr = PetscFree4(ui_ptr,il,jl,cols);CHKERRQ(ierr); 1335 1336 /* copy free_space into uj and free free_space; set uj in new datastructure; */ 1337 ierr = PetscMalloc1(ui[mbs]+1,&uj);CHKERRQ(ierr); 1338 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1339 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1340 1341 /* put together the new matrix in MATSEQSBAIJ format */ 1342 B = fact; 1343 ierr = MatSeqSBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1344 1345 b = (Mat_SeqSBAIJ*)B->data; 1346 b->singlemalloc = PETSC_FALSE; 1347 b->free_a = PETSC_TRUE; 1348 b->free_ij = PETSC_TRUE; 1349 1350 ierr = PetscMalloc1(ui[mbs]+1,&b->a);CHKERRQ(ierr); 1351 1352 b->j = uj; 1353 b->i = ui; 1354 b->diag = 0; 1355 b->ilen = 0; 1356 b->imax = 0; 1357 b->row = perm; 1358 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1359 1360 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1361 b->icol = perm; 1362 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1363 ierr = PetscMalloc1(mbs+1,&b->solve_work);CHKERRQ(ierr); 1364 ierr = PetscLogObjectMemory((PetscObject)B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1365 b->maxnz = b->nz = ui[mbs]; 1366 1367 B->info.factor_mallocs = reallocs; 1368 B->info.fill_ratio_given = fill; 1369 if (ai[mbs] != 0.) { 1370 /* nonzeros in lower triangular part of A = (ai[mbs]+mbs)/2 */ 1371 B->info.fill_ratio_needed = ((PetscReal)2*ui[mbs])/(ai[mbs]+mbs); 1372 } else { 1373 B->info.fill_ratio_needed = 0.0; 1374 } 1375 #if defined(PETSC_USE_INFO) 1376 if (ai[mbs] != 0.) { 1377 PetscReal af = B->info.fill_ratio_needed; 1378 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 1379 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1380 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1383 } 1384 #endif 1385 if (perm_identity) { 1386 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 1387 } else { 1388 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 1389 } 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatSolve_SeqBAIJ_N_NaturalOrdering" 1395 PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 1396 { 1397 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1398 PetscErrorCode ierr; 1399 const PetscInt *ai=a->i,*aj=a->j,*adiag=a->diag,*vi; 1400 PetscInt i,k,n=a->mbs; 1401 PetscInt nz,bs=A->rmap->bs,bs2=a->bs2; 1402 const MatScalar *aa=a->a,*v; 1403 PetscScalar *x,*s,*t,*ls; 1404 const PetscScalar *b; 1405 1406 PetscFunctionBegin; 1407 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1408 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1409 t = a->solve_work; 1410 1411 /* forward solve the lower triangular */ 1412 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* copy 1st block of b to t */ 1413 1414 for (i=1; i<n; i++) { 1415 v = aa + bs2*ai[i]; 1416 vi = aj + ai[i]; 1417 nz = ai[i+1] - ai[i]; 1418 s = t + bs*i; 1419 ierr = PetscMemcpy(s,b+bs*i,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* copy i_th block of b to t */ 1420 for (k=0;k<nz;k++) { 1421 PetscKernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*vi[k]); 1422 v += bs2; 1423 } 1424 } 1425 1426 /* backward solve the upper triangular */ 1427 ls = a->solve_work + A->cmap->n; 1428 for (i=n-1; i>=0; i--) { 1429 v = aa + bs2*(adiag[i+1]+1); 1430 vi = aj + adiag[i+1]+1; 1431 nz = adiag[i] - adiag[i+1]-1; 1432 ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1433 for (k=0; k<nz; k++) { 1434 PetscKernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*vi[k]); 1435 v += bs2; 1436 } 1437 PetscKernel_w_gets_A_times_v(bs,ls,aa+bs2*adiag[i],t+i*bs); /* *inv(diagonal[i]) */ 1438 ierr = PetscMemcpy(x+i*bs,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1439 } 1440 1441 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1442 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1443 ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatSolve_SeqBAIJ_N" 1449 PetscErrorCode MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx) 1450 { 1451 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 1452 IS iscol=a->col,isrow=a->row; 1453 PetscErrorCode ierr; 1454 const PetscInt *r,*c,*rout,*cout,*ai=a->i,*aj=a->j,*adiag=a->diag,*vi; 1455 PetscInt i,m,n=a->mbs; 1456 PetscInt nz,bs=A->rmap->bs,bs2=a->bs2; 1457 const MatScalar *aa=a->a,*v; 1458 PetscScalar *x,*s,*t,*ls; 1459 const PetscScalar *b; 1460 1461 PetscFunctionBegin; 1462 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1463 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1464 t = a->solve_work; 1465 1466 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1467 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1468 1469 /* forward solve the lower triangular */ 1470 ierr = PetscMemcpy(t,b+bs*r[0],bs*sizeof(PetscScalar));CHKERRQ(ierr); 1471 for (i=1; i<n; i++) { 1472 v = aa + bs2*ai[i]; 1473 vi = aj + ai[i]; 1474 nz = ai[i+1] - ai[i]; 1475 s = t + bs*i; 1476 ierr = PetscMemcpy(s,b+bs*r[i],bs*sizeof(PetscScalar));CHKERRQ(ierr); 1477 for (m=0; m<nz; m++) { 1478 PetscKernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*vi[m]); 1479 v += bs2; 1480 } 1481 } 1482 1483 /* backward solve the upper triangular */ 1484 ls = a->solve_work + A->cmap->n; 1485 for (i=n-1; i>=0; i--) { 1486 v = aa + bs2*(adiag[i+1]+1); 1487 vi = aj + adiag[i+1]+1; 1488 nz = adiag[i] - adiag[i+1] - 1; 1489 ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1490 for (m=0; m<nz; m++) { 1491 PetscKernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*vi[m]); 1492 v += bs2; 1493 } 1494 PetscKernel_w_gets_A_times_v(bs,ls,v,t+i*bs); /* *inv(diagonal[i]) */ 1495 ierr = PetscMemcpy(x + bs*c[i],t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1496 } 1497 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1498 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1499 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1500 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1501 ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatBlockAbs_privat" 1507 /* 1508 For each block in an block array saves the largest absolute value in the block into another array 1509 */ 1510 static PetscErrorCode MatBlockAbs_private(PetscInt nbs,PetscInt bs2,PetscScalar *blockarray,PetscReal *absarray) 1511 { 1512 PetscErrorCode ierr; 1513 PetscInt i,j; 1514 1515 PetscFunctionBegin; 1516 ierr = PetscMemzero(absarray,(nbs+1)*sizeof(PetscReal));CHKERRQ(ierr); 1517 for (i=0; i<nbs; i++) { 1518 for (j=0; j<bs2; j++) { 1519 if (absarray[i] < PetscAbsScalar(blockarray[i*nbs+j])) absarray[i] = PetscAbsScalar(blockarray[i*nbs+j]); 1520 } 1521 } 1522 PetscFunctionReturn(0); 1523 } 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatILUDTFactor_SeqBAIJ" 1527 /* 1528 This needs to be renamed and called by the regular MatILUFactor_SeqBAIJ when drop tolerance is used 1529 */ 1530 PetscErrorCode MatILUDTFactor_SeqBAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 1531 { 1532 Mat B = *fact; 1533 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b; 1534 IS isicol; 1535 PetscErrorCode ierr; 1536 const PetscInt *r,*ic; 1537 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,bs2=a->bs2,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 1538 PetscInt *bi,*bj,*bdiag; 1539 1540 PetscInt row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au; 1541 PetscInt nlnk,*lnk; 1542 PetscBT lnkbt; 1543 PetscBool row_identity,icol_identity; 1544 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,*multiplier,*vtmp; 1545 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 1546 1547 PetscReal dt=info->dt; /* shift=info->shiftamount; */ 1548 PetscInt nnz_max; 1549 PetscBool missing; 1550 PetscReal *vtmp_abs; 1551 MatScalar *v_work; 1552 PetscInt *v_pivots; 1553 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 1554 1555 PetscFunctionBegin; 1556 /* ------- symbolic factorization, can be reused ---------*/ 1557 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1558 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1559 adiag=a->diag; 1560 1561 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1562 1563 /* bdiag is location of diagonal in factor */ 1564 ierr = PetscMalloc1(mbs+1,&bdiag);CHKERRQ(ierr); 1565 1566 /* allocate row pointers bi */ 1567 ierr = PetscMalloc1(2*mbs+2,&bi);CHKERRQ(ierr); 1568 1569 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 1570 dtcount = (PetscInt)info->dtcount; 1571 if (dtcount > mbs-1) dtcount = mbs-1; 1572 nnz_max = ai[mbs]+2*mbs*dtcount +2; 1573 /* printf("MatILUDTFactor_SeqBAIJ, bs %d, ai[mbs] %d, nnz_max %d, dtcount %d\n",bs,ai[mbs],nnz_max,dtcount); */ 1574 ierr = PetscMalloc1(nnz_max,&bj);CHKERRQ(ierr); 1575 nnz_max = nnz_max*bs2; 1576 ierr = PetscMalloc1(nnz_max,&ba);CHKERRQ(ierr); 1577 1578 /* put together the new matrix */ 1579 ierr = MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1580 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 1581 1582 b = (Mat_SeqBAIJ*)(B)->data; 1583 b->free_a = PETSC_TRUE; 1584 b->free_ij = PETSC_TRUE; 1585 b->singlemalloc = PETSC_FALSE; 1586 1587 b->a = ba; 1588 b->j = bj; 1589 b->i = bi; 1590 b->diag = bdiag; 1591 b->ilen = 0; 1592 b->imax = 0; 1593 b->row = isrow; 1594 b->col = iscol; 1595 1596 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1597 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1598 1599 b->icol = isicol; 1600 ierr = PetscMalloc1(bs*(mbs+1),&b->solve_work);CHKERRQ(ierr); 1601 ierr = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1602 b->maxnz = nnz_max/bs2; 1603 1604 (B)->factortype = MAT_FACTOR_ILUDT; 1605 (B)->info.factor_mallocs = 0; 1606 (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)(ai[mbs]*bs2)); 1607 /* ------- end of symbolic factorization ---------*/ 1608 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1609 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1610 1611 /* linked list for storing column indices of the active row */ 1612 nlnk = mbs + 1; 1613 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1614 1615 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 1616 ierr = PetscMalloc2(mbs,&im,mbs,&jtmp);CHKERRQ(ierr); 1617 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 1618 ierr = PetscMalloc2(mbs*bs2,&rtmp,mbs*bs2,&vtmp);CHKERRQ(ierr); 1619 ierr = PetscMalloc1(mbs+1,&vtmp_abs);CHKERRQ(ierr); 1620 ierr = PetscMalloc3(bs,&v_work,bs2,&multiplier,bs,&v_pivots);CHKERRQ(ierr); 1621 1622 allowzeropivot = PetscNot(A->erroriffailure); 1623 bi[0] = 0; 1624 bdiag[0] = (nnz_max/bs2)-1; /* location of diagonal in factor B */ 1625 bi[2*mbs+1] = bdiag[0]+1; /* endof bj and ba array */ 1626 for (i=0; i<mbs; i++) { 1627 /* copy initial fill into linked list */ 1628 nzi = ai[r[i]+1] - ai[r[i]]; 1629 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1630 nzi_al = adiag[r[i]] - ai[r[i]]; 1631 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 1632 1633 /* load in initial unfactored row */ 1634 ajtmp = aj + ai[r[i]]; 1635 ierr = PetscLLAddPerm(nzi,ajtmp,ic,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1636 ierr = PetscMemzero(rtmp,mbs*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1637 aatmp = a->a + bs2*ai[r[i]]; 1638 for (j=0; j<nzi; j++) { 1639 ierr = PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],aatmp+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1640 } 1641 1642 /* add pivot rows into linked list */ 1643 row = lnk[mbs]; 1644 while (row < i) { 1645 nzi_bl = bi[row+1] - bi[row] + 1; 1646 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 1647 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 1648 nzi += nlnk; 1649 row = lnk[row]; 1650 } 1651 1652 /* copy data from lnk into jtmp, then initialize lnk */ 1653 ierr = PetscLLClean(mbs,mbs,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 1654 1655 /* numerical factorization */ 1656 bjtmp = jtmp; 1657 row = *bjtmp++; /* 1st pivot row */ 1658 1659 while (row < i) { 1660 pc = rtmp + bs2*row; 1661 pv = ba + bs2*bdiag[row]; /* inv(diag) of the pivot row */ 1662 PetscKernel_A_gets_A_times_B(bs,pc,pv,multiplier); /* pc= multiplier = pc*inv(diag[row]) */ 1663 ierr = MatBlockAbs_private(1,bs2,pc,vtmp_abs);CHKERRQ(ierr); 1664 if (vtmp_abs[0] > dt) { /* apply tolerance dropping rule */ 1665 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 1666 pv = ba + bs2*(bdiag[row+1] + 1); 1667 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 1668 for (j=0; j<nz; j++) { 1669 PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); 1670 } 1671 /* ierr = PetscLogFlops(bslog*(nz+1.0)-bs);CHKERRQ(ierr); */ 1672 } 1673 row = *bjtmp++; 1674 } 1675 1676 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 1677 nzi_bl = 0; j = 0; 1678 while (jtmp[j] < i) { /* L-part. Note: jtmp is sorted */ 1679 ierr = PetscMemcpy(vtmp+bs2*j,rtmp+bs2*jtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1680 nzi_bl++; j++; 1681 } 1682 nzi_bu = nzi - nzi_bl -1; 1683 1684 while (j < nzi) { /* U-part */ 1685 ierr = PetscMemcpy(vtmp+bs2*j,rtmp+bs2*jtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1686 j++; 1687 } 1688 1689 ierr = MatBlockAbs_private(nzi,bs2,vtmp,vtmp_abs);CHKERRQ(ierr); 1690 1691 bjtmp = bj + bi[i]; 1692 batmp = ba + bs2*bi[i]; 1693 /* apply level dropping rule to L part */ 1694 ncut = nzi_al + dtcount; 1695 if (ncut < nzi_bl) { 1696 ierr = PetscSortSplitReal(ncut,nzi_bl,vtmp_abs,jtmp);CHKERRQ(ierr); 1697 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 1698 } else { 1699 ncut = nzi_bl; 1700 } 1701 for (j=0; j<ncut; j++) { 1702 bjtmp[j] = jtmp[j]; 1703 ierr = PetscMemcpy(batmp+bs2*j,rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1704 } 1705 bi[i+1] = bi[i] + ncut; 1706 nzi = ncut + 1; 1707 1708 /* apply level dropping rule to U part */ 1709 ncut = nzi_au + dtcount; 1710 if (ncut < nzi_bu) { 1711 ierr = PetscSortSplitReal(ncut,nzi_bu,vtmp_abs+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 1712 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 1713 } else { 1714 ncut = nzi_bu; 1715 } 1716 nzi += ncut; 1717 1718 /* mark bdiagonal */ 1719 bdiag[i+1] = bdiag[i] - (ncut + 1); 1720 bi[2*mbs - i] = bi[2*mbs - i +1] - (ncut + 1); 1721 1722 bjtmp = bj + bdiag[i]; 1723 batmp = ba + bs2*bdiag[i]; 1724 ierr = PetscMemcpy(batmp,rtmp+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1725 *bjtmp = i; 1726 1727 bjtmp = bj + bdiag[i+1]+1; 1728 batmp = ba + (bdiag[i+1]+1)*bs2; 1729 1730 for (k=0; k<ncut; k++) { 1731 bjtmp[k] = jtmp[nzi_bl+1+k]; 1732 ierr = PetscMemcpy(batmp+bs2*k,rtmp+bs2*bjtmp[k],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1733 } 1734 1735 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 1736 1737 /* invert diagonal block for simplier triangular solves - add shift??? */ 1738 batmp = ba + bs2*bdiag[i]; 1739 1740 ierr = PetscKernel_A_gets_inverse_A(bs,batmp,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1741 if (zeropivotdetected) B->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1742 } /* for (i=0; i<mbs; i++) */ 1743 ierr = PetscFree3(v_work,multiplier,v_pivots);CHKERRQ(ierr); 1744 1745 /* printf("end of L %d, beginning of U %d\n",bi[mbs],bdiag[mbs]); */ 1746 if (bi[mbs] >= bdiag[mbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[mbs],bdiag[mbs]); 1747 1748 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1749 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1750 1751 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1752 1753 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 1754 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 1755 1756 ierr = PetscLogFlops(bs2*B->cmap->n);CHKERRQ(ierr); 1757 b->maxnz = b->nz = bi[mbs] + bdiag[0] - bdiag[mbs]; 1758 1759 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1760 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 1761 if (row_identity && icol_identity) { 1762 B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering; 1763 } else { 1764 B->ops->solve = MatSolve_SeqBAIJ_N; 1765 } 1766 1767 B->ops->solveadd = 0; 1768 B->ops->solvetranspose = 0; 1769 B->ops->solvetransposeadd = 0; 1770 B->ops->matsolve = 0; 1771 B->assembled = PETSC_TRUE; 1772 B->preallocated = PETSC_TRUE; 1773 PetscFunctionReturn(0); 1774 } 1775