1 2 /* 3 Factorization code for BAIJ format. 4 */ 5 #include <../src/mat/impls/baij/seq/baij.h> 6 #include <petsc/private/kernels/blockinvert.h> 7 8 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat B,Mat A,const MatFactorInfo *info) 9 { 10 Mat C =B; 11 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; 12 IS isrow = b->row,isicol = b->icol; 13 PetscErrorCode ierr; 14 const PetscInt *r,*ic; 15 PetscInt i,j,k,nz,nzL,row,*pj; 16 const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,bs2=a->bs2; 17 const PetscInt *ajtmp,*bjtmp,*bdiag=b->diag; 18 MatScalar *rtmp,*pc,*mwork,*pv; 19 MatScalar *aa=a->a,*v; 20 PetscInt flg; 21 PetscReal shift = info->shiftamount; 22 PetscBool allowzeropivot,zeropivotdetected; 23 24 PetscFunctionBegin; 25 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 26 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 27 allowzeropivot = PetscNot(A->erroriffailure); 28 29 /* generate work space needed by the factorization */ 30 ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); 31 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 32 33 for (i=0; i<n; i++) { 34 /* zero rtmp */ 35 /* L part */ 36 nz = bi[i+1] - bi[i]; 37 bjtmp = bj + bi[i]; 38 for (j=0; j<nz; j++) { 39 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 40 } 41 42 /* U part */ 43 nz = bdiag[i] - bdiag[i+1]; 44 bjtmp = bj + bdiag[i+1]+1; 45 for (j=0; j<nz; j++) { 46 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 47 } 48 49 /* load in initial (unfactored row) */ 50 nz = ai[r[i]+1] - ai[r[i]]; 51 ajtmp = aj + ai[r[i]]; 52 v = aa + bs2*ai[r[i]]; 53 for (j=0; j<nz; j++) { 54 ierr = PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 55 } 56 57 /* elimination */ 58 bjtmp = bj + bi[i]; 59 nzL = bi[i+1] - bi[i]; 60 for (k=0; k < nzL; k++) { 61 row = bjtmp[k]; 62 pc = rtmp + bs2*row; 63 for (flg=0,j=0; j<bs2; j++) { 64 if (pc[j] != (PetscScalar)0.0) { 65 flg = 1; 66 break; 67 } 68 } 69 if (flg) { 70 pv = b->a + bs2*bdiag[row]; 71 /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 72 ierr = PetscKernel_A_gets_A_times_B_2(pc,pv,mwork);CHKERRQ(ierr); 73 74 pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ 75 pv = b->a + bs2*(bdiag[row+1]+1); 76 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ 77 for (j=0; j<nz; j++) { 78 /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 79 /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 80 v = rtmp + 4*pj[j]; 81 ierr = PetscKernel_A_gets_A_minus_B_times_C_2(v,pc,pv);CHKERRQ(ierr); 82 pv += 4; 83 } 84 ierr = PetscLogFlops(16*nz+12);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 85 } 86 } 87 88 /* finished row so stick it into b->a */ 89 /* L part */ 90 pv = b->a + bs2*bi[i]; 91 pj = b->j + bi[i]; 92 nz = bi[i+1] - bi[i]; 93 for (j=0; j<nz; j++) { 94 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 95 } 96 97 /* Mark diagonal and invert diagonal for simplier triangular solves */ 98 pv = b->a + bs2*bdiag[i]; 99 pj = b->j + bdiag[i]; 100 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 101 102 ierr = PetscKernel_A_gets_inverse_A_2(pv,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 103 if (zeropivotdetected) B->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 104 105 /* U part */ 106 pv = b->a + bs2*(bdiag[i+1]+1); 107 pj = b->j + bdiag[i+1]+1; 108 nz = bdiag[i] - bdiag[i+1] - 1; 109 for (j=0; j<nz; j++) { 110 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 111 } 112 } 113 114 ierr = PetscFree2(rtmp,mwork);CHKERRQ(ierr); 115 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 116 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 117 118 C->ops->solve = MatSolve_SeqBAIJ_2; 119 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 120 C->assembled = PETSC_TRUE; 121 122 ierr = PetscLogFlops(1.333333333333*2*2*2*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ 123 PetscFunctionReturn(0); 124 } 125 126 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info) 127 { 128 Mat C =B; 129 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; 130 PetscErrorCode ierr; 131 PetscInt i,j,k,nz,nzL,row,*pj; 132 const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,bs2=a->bs2; 133 const PetscInt *ajtmp,*bjtmp,*bdiag=b->diag; 134 MatScalar *rtmp,*pc,*mwork,*pv; 135 MatScalar *aa=a->a,*v; 136 PetscInt flg; 137 PetscReal shift = info->shiftamount; 138 PetscBool allowzeropivot,zeropivotdetected; 139 140 PetscFunctionBegin; 141 allowzeropivot = PetscNot(A->erroriffailure); 142 143 /* generate work space needed by the factorization */ 144 ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); 145 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 146 147 for (i=0; i<n; i++) { 148 /* zero rtmp */ 149 /* L part */ 150 nz = bi[i+1] - bi[i]; 151 bjtmp = bj + bi[i]; 152 for (j=0; j<nz; j++) { 153 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 154 } 155 156 /* U part */ 157 nz = bdiag[i] - bdiag[i+1]; 158 bjtmp = bj + bdiag[i+1]+1; 159 for (j=0; j<nz; j++) { 160 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 161 } 162 163 /* load in initial (unfactored row) */ 164 nz = ai[i+1] - ai[i]; 165 ajtmp = aj + ai[i]; 166 v = aa + bs2*ai[i]; 167 for (j=0; j<nz; j++) { 168 ierr = PetscMemcpy(rtmp+bs2*ajtmp[j],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 169 } 170 171 /* elimination */ 172 bjtmp = bj + bi[i]; 173 nzL = bi[i+1] - bi[i]; 174 for (k=0; k < nzL; k++) { 175 row = bjtmp[k]; 176 pc = rtmp + bs2*row; 177 for (flg=0,j=0; j<bs2; j++) { 178 if (pc[j]!=(PetscScalar)0.0) { 179 flg = 1; 180 break; 181 } 182 } 183 if (flg) { 184 pv = b->a + bs2*bdiag[row]; 185 /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */ 186 ierr = PetscKernel_A_gets_A_times_B_2(pc,pv,mwork);CHKERRQ(ierr); 187 188 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 189 pv = b->a + bs2*(bdiag[row+1]+1); 190 nz = bdiag[row]-bdiag[row+1] - 1; /* num of entries in U(row,:) excluding diag */ 191 for (j=0; j<nz; j++) { 192 /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */ 193 /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */ 194 v = rtmp + 4*pj[j]; 195 ierr = PetscKernel_A_gets_A_minus_B_times_C_2(v,pc,pv);CHKERRQ(ierr); 196 pv += 4; 197 } 198 ierr = PetscLogFlops(16*nz+12);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 199 } 200 } 201 202 /* finished row so stick it into b->a */ 203 /* L part */ 204 pv = b->a + bs2*bi[i]; 205 pj = b->j + bi[i]; 206 nz = bi[i+1] - bi[i]; 207 for (j=0; j<nz; j++) { 208 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 209 } 210 211 /* Mark diagonal and invert diagonal for simplier triangular solves */ 212 pv = b->a + bs2*bdiag[i]; 213 pj = b->j + bdiag[i]; 214 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 215 216 ierr = PetscKernel_A_gets_inverse_A_2(pv,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 217 if (zeropivotdetected) B->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 218 219 /* U part */ 220 /* 221 pv = b->a + bs2*bi[2*n-i]; 222 pj = b->j + bi[2*n-i]; 223 nz = bi[2*n-i+1] - bi[2*n-i] - 1; 224 */ 225 pv = b->a + bs2*(bdiag[i+1]+1); 226 pj = b->j + bdiag[i+1]+1; 227 nz = bdiag[i] - bdiag[i+1] - 1; 228 for (j=0; j<nz; j++) { 229 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 230 } 231 } 232 ierr = PetscFree2(rtmp,mwork);CHKERRQ(ierr); 233 234 C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 235 C->ops->forwardsolve = MatForwardSolve_SeqBAIJ_2_NaturalOrdering; 236 C->ops->backwardsolve = MatBackwardSolve_SeqBAIJ_2_NaturalOrdering; 237 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 238 C->assembled = PETSC_TRUE; 239 240 ierr = PetscLogFlops(1.333333333333*2*2*2*n);CHKERRQ(ierr); /* from inverting diagonal blocks */ 241 PetscFunctionReturn(0); 242 } 243 244 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_inplace(Mat B,Mat A,const MatFactorInfo *info) 245 { 246 Mat C = B; 247 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data; 248 IS isrow = b->row,isicol = b->icol; 249 PetscErrorCode ierr; 250 const PetscInt *r,*ic; 251 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 252 PetscInt *ajtmpold,*ajtmp,nz,row; 253 PetscInt *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 254 MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 255 MatScalar p1,p2,p3,p4; 256 MatScalar *ba = b->a,*aa = a->a; 257 PetscReal shift = info->shiftamount; 258 PetscBool allowzeropivot,zeropivotdetected; 259 260 PetscFunctionBegin; 261 allowzeropivot = PetscNot(A->erroriffailure); 262 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 263 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 264 ierr = PetscMalloc1(4*(n+1),&rtmp);CHKERRQ(ierr); 265 266 for (i=0; i<n; i++) { 267 nz = bi[i+1] - bi[i]; 268 ajtmp = bj + bi[i]; 269 for (j=0; j<nz; j++) { 270 x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 271 } 272 /* load in initial (unfactored row) */ 273 idx = r[i]; 274 nz = ai[idx+1] - ai[idx]; 275 ajtmpold = aj + ai[idx]; 276 v = aa + 4*ai[idx]; 277 for (j=0; j<nz; j++) { 278 x = rtmp+4*ic[ajtmpold[j]]; 279 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 280 v += 4; 281 } 282 row = *ajtmp++; 283 while (row < i) { 284 pc = rtmp + 4*row; 285 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 286 if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) { 287 pv = ba + 4*diag_offset[row]; 288 pj = bj + diag_offset[row] + 1; 289 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 290 pc[0] = m1 = p1*x1 + p3*x2; 291 pc[1] = m2 = p2*x1 + p4*x2; 292 pc[2] = m3 = p1*x3 + p3*x4; 293 pc[3] = m4 = p2*x3 + p4*x4; 294 nz = bi[row+1] - diag_offset[row] - 1; 295 pv += 4; 296 for (j=0; j<nz; j++) { 297 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 298 x = rtmp + 4*pj[j]; 299 x[0] -= m1*x1 + m3*x2; 300 x[1] -= m2*x1 + m4*x2; 301 x[2] -= m1*x3 + m3*x4; 302 x[3] -= m2*x3 + m4*x4; 303 pv += 4; 304 } 305 ierr = PetscLogFlops(16.0*nz+12.0);CHKERRQ(ierr); 306 } 307 row = *ajtmp++; 308 } 309 /* finished row so stick it into b->a */ 310 pv = ba + 4*bi[i]; 311 pj = bj + bi[i]; 312 nz = bi[i+1] - bi[i]; 313 for (j=0; j<nz; j++) { 314 x = rtmp+4*pj[j]; 315 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 316 pv += 4; 317 } 318 /* invert diagonal block */ 319 w = ba + 4*diag_offset[i]; 320 ierr = PetscKernel_A_gets_inverse_A_2(w,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 321 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 322 } 323 324 ierr = PetscFree(rtmp);CHKERRQ(ierr); 325 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 326 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 327 328 C->ops->solve = MatSolve_SeqBAIJ_2_inplace; 329 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_inplace; 330 C->assembled = PETSC_TRUE; 331 332 ierr = PetscLogFlops(1.333333333333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 333 PetscFunctionReturn(0); 334 } 335 /* 336 Version for when blocks are 2 by 2 Using natural ordering 337 */ 338 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info) 339 { 340 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data; 341 PetscErrorCode ierr; 342 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 343 PetscInt *ajtmpold,*ajtmp,nz,row; 344 PetscInt *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 345 MatScalar *pv,*v,*rtmp,*pc,*w,*x; 346 MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 347 MatScalar *ba = b->a,*aa = a->a; 348 PetscReal shift = info->shiftamount; 349 PetscBool allowzeropivot,zeropivotdetected; 350 351 PetscFunctionBegin; 352 allowzeropivot = PetscNot(A->erroriffailure); 353 ierr = PetscMalloc1(4*(n+1),&rtmp);CHKERRQ(ierr); 354 for (i=0; i<n; i++) { 355 nz = bi[i+1] - bi[i]; 356 ajtmp = bj + bi[i]; 357 for (j=0; j<nz; j++) { 358 x = rtmp+4*ajtmp[j]; 359 x[0] = x[1] = x[2] = x[3] = 0.0; 360 } 361 /* load in initial (unfactored row) */ 362 nz = ai[i+1] - ai[i]; 363 ajtmpold = aj + ai[i]; 364 v = aa + 4*ai[i]; 365 for (j=0; j<nz; j++) { 366 x = rtmp+4*ajtmpold[j]; 367 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 368 v += 4; 369 } 370 row = *ajtmp++; 371 while (row < i) { 372 pc = rtmp + 4*row; 373 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 374 if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) { 375 pv = ba + 4*diag_offset[row]; 376 pj = bj + diag_offset[row] + 1; 377 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 378 pc[0] = m1 = p1*x1 + p3*x2; 379 pc[1] = m2 = p2*x1 + p4*x2; 380 pc[2] = m3 = p1*x3 + p3*x4; 381 pc[3] = m4 = p2*x3 + p4*x4; 382 nz = bi[row+1] - diag_offset[row] - 1; 383 pv += 4; 384 for (j=0; j<nz; j++) { 385 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 386 x = rtmp + 4*pj[j]; 387 x[0] -= m1*x1 + m3*x2; 388 x[1] -= m2*x1 + m4*x2; 389 x[2] -= m1*x3 + m3*x4; 390 x[3] -= m2*x3 + m4*x4; 391 pv += 4; 392 } 393 ierr = PetscLogFlops(16.0*nz+12.0);CHKERRQ(ierr); 394 } 395 row = *ajtmp++; 396 } 397 /* finished row so stick it into b->a */ 398 pv = ba + 4*bi[i]; 399 pj = bj + bi[i]; 400 nz = bi[i+1] - bi[i]; 401 for (j=0; j<nz; j++) { 402 x = rtmp+4*pj[j]; 403 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 404 /* 405 printf(" col %d:",pj[j]); 406 PetscInt j1; 407 for (j1=0; j1<4; j1++) printf(" %g,",*(pv+j1)); 408 printf("\n"); 409 */ 410 pv += 4; 411 } 412 /* invert diagonal block */ 413 w = ba + 4*diag_offset[i]; 414 ierr = PetscKernel_A_gets_inverse_A_2(w,shift, allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 415 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 416 } 417 418 ierr = PetscFree(rtmp);CHKERRQ(ierr); 419 420 C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering_inplace; 421 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering_inplace; 422 C->assembled = PETSC_TRUE; 423 424 ierr = PetscLogFlops(1.333333333333*8*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 425 PetscFunctionReturn(0); 426 } 427 428 /* ----------------------------------------------------------- */ 429 /* 430 Version for when blocks are 1 by 1. 431 */ 432 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat B,Mat A,const MatFactorInfo *info) 433 { 434 Mat C =B; 435 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; 436 IS isrow = b->row,isicol = b->icol; 437 PetscErrorCode ierr; 438 const PetscInt *r,*ic,*ics; 439 const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 440 PetscInt i,j,k,nz,nzL,row,*pj; 441 const PetscInt *ajtmp,*bjtmp; 442 MatScalar *rtmp,*pc,multiplier,*pv; 443 const MatScalar *aa=a->a,*v; 444 PetscBool row_identity,col_identity; 445 FactorShiftCtx sctx; 446 const PetscInt *ddiag; 447 PetscReal rs; 448 MatScalar d; 449 450 PetscFunctionBegin; 451 /* MatPivotSetUp(): initialize shift context sctx */ 452 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 453 454 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 455 ddiag = a->diag; 456 sctx.shift_top = info->zeropivot; 457 for (i=0; i<n; i++) { 458 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 459 d = (aa)[ddiag[i]]; 460 rs = -PetscAbsScalar(d) - PetscRealPart(d); 461 v = aa+ai[i]; 462 nz = ai[i+1] - ai[i]; 463 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 464 if (rs>sctx.shift_top) sctx.shift_top = rs; 465 } 466 sctx.shift_top *= 1.1; 467 sctx.nshift_max = 5; 468 sctx.shift_lo = 0.; 469 sctx.shift_hi = 1.; 470 } 471 472 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 473 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 474 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 475 ics = ic; 476 477 do { 478 sctx.newshift = PETSC_FALSE; 479 for (i=0; i<n; i++) { 480 /* zero rtmp */ 481 /* L part */ 482 nz = bi[i+1] - bi[i]; 483 bjtmp = bj + bi[i]; 484 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 485 486 /* U part */ 487 nz = bdiag[i]-bdiag[i+1]; 488 bjtmp = bj + bdiag[i+1]+1; 489 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 490 491 /* load in initial (unfactored row) */ 492 nz = ai[r[i]+1] - ai[r[i]]; 493 ajtmp = aj + ai[r[i]]; 494 v = aa + ai[r[i]]; 495 for (j=0; j<nz; j++) rtmp[ics[ajtmp[j]]] = v[j]; 496 497 /* ZeropivotApply() */ 498 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 499 500 /* elimination */ 501 bjtmp = bj + bi[i]; 502 row = *bjtmp++; 503 nzL = bi[i+1] - bi[i]; 504 for (k=0; k < nzL; k++) { 505 pc = rtmp + row; 506 if (*pc != (PetscScalar)0.0) { 507 pv = b->a + bdiag[row]; 508 multiplier = *pc * (*pv); 509 *pc = multiplier; 510 511 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 512 pv = b->a + bdiag[row+1]+1; 513 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 514 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 515 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 516 } 517 row = *bjtmp++; 518 } 519 520 /* finished row so stick it into b->a */ 521 rs = 0.0; 522 /* L part */ 523 pv = b->a + bi[i]; 524 pj = b->j + bi[i]; 525 nz = bi[i+1] - bi[i]; 526 for (j=0; j<nz; j++) { 527 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 528 } 529 530 /* U part */ 531 pv = b->a + bdiag[i+1]+1; 532 pj = b->j + bdiag[i+1]+1; 533 nz = bdiag[i] - bdiag[i+1]-1; 534 for (j=0; j<nz; j++) { 535 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 536 } 537 538 sctx.rs = rs; 539 sctx.pv = rtmp[i]; 540 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 541 if (sctx.newshift) break; /* break for-loop */ 542 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 543 544 /* Mark diagonal and invert diagonal for simplier triangular solves */ 545 pv = b->a + bdiag[i]; 546 *pv = (PetscScalar)1.0/rtmp[i]; 547 548 } /* endof for (i=0; i<n; i++) { */ 549 550 /* MatPivotRefine() */ 551 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 552 /* 553 * if no shift in this attempt & shifting & started shifting & can refine, 554 * then try lower shift 555 */ 556 sctx.shift_hi = sctx.shift_fraction; 557 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 558 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 559 sctx.newshift = PETSC_TRUE; 560 sctx.nshift++; 561 } 562 } while (sctx.newshift); 563 564 ierr = PetscFree(rtmp);CHKERRQ(ierr); 565 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 566 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 567 568 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 569 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 570 if (row_identity && col_identity) { 571 C->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering; 572 C->ops->forwardsolve = MatForwardSolve_SeqBAIJ_1_NaturalOrdering; 573 C->ops->backwardsolve = MatBackwardSolve_SeqBAIJ_1_NaturalOrdering; 574 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering; 575 } else { 576 C->ops->solve = MatSolve_SeqBAIJ_1; 577 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 578 } 579 C->assembled = PETSC_TRUE; 580 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 581 582 /* MatShiftView(A,info,&sctx) */ 583 if (sctx.nshift) { 584 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 585 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 586 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 587 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 588 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 589 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 590 } 591 } 592 PetscFunctionReturn(0); 593 } 594 595 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo *info) 596 { 597 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)C->data; 598 IS isrow = b->row,isicol = b->icol; 599 PetscErrorCode ierr; 600 const PetscInt *r,*ic; 601 PetscInt i,j,n = a->mbs,*bi = b->i,*bj = b->j; 602 PetscInt *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 603 PetscInt *diag_offset = b->diag,diag,*pj; 604 MatScalar *pv,*v,*rtmp,multiplier,*pc; 605 MatScalar *ba = b->a,*aa = a->a; 606 PetscBool row_identity, col_identity; 607 608 PetscFunctionBegin; 609 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 610 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 611 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 612 613 for (i=0; i<n; i++) { 614 nz = bi[i+1] - bi[i]; 615 ajtmp = bj + bi[i]; 616 for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 617 618 /* load in initial (unfactored row) */ 619 nz = ai[r[i]+1] - ai[r[i]]; 620 ajtmpold = aj + ai[r[i]]; 621 v = aa + ai[r[i]]; 622 for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 623 624 row = *ajtmp++; 625 while (row < i) { 626 pc = rtmp + row; 627 if (*pc != 0.0) { 628 pv = ba + diag_offset[row]; 629 pj = bj + diag_offset[row] + 1; 630 multiplier = *pc * *pv++; 631 *pc = multiplier; 632 nz = bi[row+1] - diag_offset[row] - 1; 633 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 634 ierr = PetscLogFlops(1.0+2.0*nz);CHKERRQ(ierr); 635 } 636 row = *ajtmp++; 637 } 638 /* finished row so stick it into b->a */ 639 pv = ba + bi[i]; 640 pj = bj + bi[i]; 641 nz = bi[i+1] - bi[i]; 642 for (j=0; j<nz; j++) pv[j] = rtmp[pj[j]]; 643 diag = diag_offset[i] - bi[i]; 644 /* check pivot entry for current row */ 645 if (pv[diag] == 0.0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot: row in original ordering %D in permuted ordering %D",r[i],i); 646 pv[diag] = 1.0/pv[diag]; 647 } 648 649 ierr = PetscFree(rtmp);CHKERRQ(ierr); 650 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 651 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 652 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 653 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 654 if (row_identity && col_identity) { 655 C->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering_inplace; 656 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering_inplace; 657 } else { 658 C->ops->solve = MatSolve_SeqBAIJ_1_inplace; 659 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_inplace; 660 } 661 C->assembled = PETSC_TRUE; 662 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 PETSC_INTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat A,const MatFactorType ftype,Mat *B) 667 { 668 PetscInt n = A->rmap->n; 669 PetscErrorCode ierr; 670 671 PetscFunctionBegin; 672 #if defined(PETSC_USE_COMPLEX) 673 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 674 #endif 675 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 676 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 677 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 678 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 679 680 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqBAIJ; 681 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqBAIJ; 682 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 683 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 684 ierr = MatSeqSBAIJSetPreallocation(*B,A->rmap->bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 685 686 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqBAIJ; 687 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqBAIJ; 688 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 689 (*B)->factortype = ftype; 690 691 ierr = PetscFree((*B)->solvertype);CHKERRQ(ierr); 692 ierr = PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);CHKERRQ(ierr); 693 PetscFunctionReturn(0); 694 } 695 696 /* ----------------------------------------------------------- */ 697 PetscErrorCode MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 698 { 699 PetscErrorCode ierr; 700 Mat C; 701 702 PetscFunctionBegin; 703 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 704 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 705 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 706 707 A->ops->solve = C->ops->solve; 708 A->ops->solvetranspose = C->ops->solvetranspose; 709 710 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 711 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqBAIJ*)(A->data))->icol);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #include <../src/mat/impls/sbaij/seq/sbaij.h> 716 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat C,Mat A,const MatFactorInfo *info) 717 { 718 PetscErrorCode ierr; 719 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 720 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 721 IS ip=b->row; 722 const PetscInt *rip; 723 PetscInt i,j,mbs=a->mbs,bs=A->rmap->bs,*bi=b->i,*bj=b->j,*bcol; 724 PetscInt *ai=a->i,*aj=a->j; 725 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 726 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 727 PetscReal rs; 728 FactorShiftCtx sctx; 729 730 PetscFunctionBegin; 731 if (bs > 1) { /* convert A to a SBAIJ matrix and apply Cholesky factorization from it */ 732 if (!a->sbaijMat) { 733 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 734 } 735 ierr = (a->sbaijMat)->ops->choleskyfactornumeric(C,a->sbaijMat,info);CHKERRQ(ierr); 736 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 /* MatPivotSetUp(): initialize shift context sctx */ 741 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 742 743 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 744 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr); 745 746 sctx.shift_amount = 0.; 747 sctx.nshift = 0; 748 do { 749 sctx.newshift = PETSC_FALSE; 750 for (i=0; i<mbs; i++) { 751 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 752 } 753 754 for (k = 0; k<mbs; k++) { 755 bval = ba + bi[k]; 756 /* initialize k-th row by the perm[k]-th row of A */ 757 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 758 for (j = jmin; j < jmax; j++) { 759 col = rip[aj[j]]; 760 if (col >= k) { /* only take upper triangular entry */ 761 rtmp[col] = aa[j]; 762 *bval++ = 0.0; /* for in-place factorization */ 763 } 764 } 765 766 /* shift the diagonal of the matrix */ 767 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 768 769 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 770 dk = rtmp[k]; 771 i = jl[k]; /* first row to be added to k_th row */ 772 773 while (i < k) { 774 nexti = jl[i]; /* next row to be added to k_th row */ 775 776 /* compute multiplier, update diag(k) and U(i,k) */ 777 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 778 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 779 dk += uikdi*ba[ili]; 780 ba[ili] = uikdi; /* -U(i,k) */ 781 782 /* add multiple of row i to k-th row */ 783 jmin = ili + 1; jmax = bi[i+1]; 784 if (jmin < jmax) { 785 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 786 /* update il and jl for row i */ 787 il[i] = jmin; 788 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 789 } 790 i = nexti; 791 } 792 793 /* shift the diagonals when zero pivot is detected */ 794 /* compute rs=sum of abs(off-diagonal) */ 795 rs = 0.0; 796 jmin = bi[k]+1; 797 nz = bi[k+1] - jmin; 798 if (nz) { 799 bcol = bj + jmin; 800 while (nz--) { 801 rs += PetscAbsScalar(rtmp[*bcol]); 802 bcol++; 803 } 804 } 805 806 sctx.rs = rs; 807 sctx.pv = dk; 808 ierr = MatPivotCheck(C,A,info,&sctx,k);CHKERRQ(ierr); 809 if (sctx.newshift) break; 810 dk = sctx.pv; 811 812 /* copy data into U(k,:) */ 813 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 814 jmin = bi[k]+1; jmax = bi[k+1]; 815 if (jmin < jmax) { 816 for (j=jmin; j<jmax; j++) { 817 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 818 } 819 /* add the k-th row into il and jl */ 820 il[k] = jmin; 821 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 822 } 823 } 824 } while (sctx.newshift); 825 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 826 827 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 828 829 C->assembled = PETSC_TRUE; 830 C->preallocated = PETSC_TRUE; 831 832 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 833 if (sctx.nshift) { 834 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 835 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 836 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 837 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 838 } 839 } 840 PetscFunctionReturn(0); 841 } 842 843 PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info) 844 { 845 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 846 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 847 PetscErrorCode ierr; 848 PetscInt i,j,am=a->mbs; 849 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 850 PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz; 851 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 852 PetscReal rs; 853 FactorShiftCtx sctx; 854 855 PetscFunctionBegin; 856 /* MatPivotSetUp(): initialize shift context sctx */ 857 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 858 859 ierr = PetscMalloc3(am,&rtmp,am,&il,am,&jl);CHKERRQ(ierr); 860 861 do { 862 sctx.newshift = PETSC_FALSE; 863 for (i=0; i<am; i++) { 864 rtmp[i] = 0.0; jl[i] = am; il[0] = 0; 865 } 866 867 for (k = 0; k<am; k++) { 868 /* initialize k-th row with elements nonzero in row perm(k) of A */ 869 nz = ai[k+1] - ai[k]; 870 acol = aj + ai[k]; 871 aval = aa + ai[k]; 872 bval = ba + bi[k]; 873 while (nz--) { 874 if (*acol < k) { /* skip lower triangular entries */ 875 acol++; aval++; 876 } else { 877 rtmp[*acol++] = *aval++; 878 *bval++ = 0.0; /* for in-place factorization */ 879 } 880 } 881 882 /* shift the diagonal of the matrix */ 883 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 884 885 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 886 dk = rtmp[k]; 887 i = jl[k]; /* first row to be added to k_th row */ 888 889 while (i < k) { 890 nexti = jl[i]; /* next row to be added to k_th row */ 891 /* compute multiplier, update D(k) and U(i,k) */ 892 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 893 uikdi = -ba[ili]*ba[bi[i]]; 894 dk += uikdi*ba[ili]; 895 ba[ili] = uikdi; /* -U(i,k) */ 896 897 /* add multiple of row i to k-th row ... */ 898 jmin = ili + 1; 899 nz = bi[i+1] - jmin; 900 if (nz > 0) { 901 bcol = bj + jmin; 902 bval = ba + jmin; 903 while (nz--) rtmp[*bcol++] += uikdi*(*bval++); 904 /* update il and jl for i-th row */ 905 il[i] = jmin; 906 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 907 } 908 i = nexti; 909 } 910 911 /* shift the diagonals when zero pivot is detected */ 912 /* compute rs=sum of abs(off-diagonal) */ 913 rs = 0.0; 914 jmin = bi[k]+1; 915 nz = bi[k+1] - jmin; 916 if (nz) { 917 bcol = bj + jmin; 918 while (nz--) { 919 rs += PetscAbsScalar(rtmp[*bcol]); 920 bcol++; 921 } 922 } 923 924 sctx.rs = rs; 925 sctx.pv = dk; 926 ierr = MatPivotCheck(C,A,info,&sctx,k);CHKERRQ(ierr); 927 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 928 dk = sctx.pv; 929 930 /* copy data into U(k,:) */ 931 ba[bi[k]] = 1.0/dk; 932 jmin = bi[k]+1; 933 nz = bi[k+1] - jmin; 934 if (nz) { 935 bcol = bj + jmin; 936 bval = ba + jmin; 937 while (nz--) { 938 *bval++ = rtmp[*bcol]; 939 rtmp[*bcol++] = 0.0; 940 } 941 /* add k-th row into il and jl */ 942 il[k] = jmin; 943 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 944 } 945 } 946 } while (sctx.newshift); 947 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 948 949 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 950 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 951 C->assembled = PETSC_TRUE; 952 C->preallocated = PETSC_TRUE; 953 954 ierr = PetscLogFlops(C->rmap->N);CHKERRQ(ierr); 955 if (sctx.nshift) { 956 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 957 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 958 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 959 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 960 } 961 } 962 PetscFunctionReturn(0); 963 } 964 965 #include <petscbt.h> 966 #include <../src/mat/utils/freespace.h> 967 PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 968 { 969 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 970 Mat_SeqSBAIJ *b; 971 Mat B; 972 PetscErrorCode ierr; 973 PetscBool perm_identity,missing; 974 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=a->mbs,bs=A->rmap->bs,*ui; 975 const PetscInt *rip; 976 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 977 PetscInt nlnk,*lnk,*lnk_lvl=NULL,ncols,ncols_upper,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr; 978 PetscReal fill =info->fill,levels=info->levels; 979 PetscFreeSpaceList free_space =NULL,current_space=NULL; 980 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 981 PetscBT lnkbt; 982 983 PetscFunctionBegin; 984 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 985 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 986 987 if (bs > 1) { 988 if (!a->sbaijMat) { 989 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 990 } 991 (fact)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */ 992 993 ierr = MatICCFactorSymbolic(fact,a->sbaijMat,perm,info);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 998 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 999 1000 /* special case that simply copies fill pattern */ 1001 if (!levels && perm_identity) { 1002 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 1003 for (i=0; i<am; i++) ui[i] = ai[i+1] - a->diag[i]; /* ui: rowlengths - changes when !perm_identity */ 1004 B = fact; 1005 ierr = MatSeqSBAIJSetPreallocation(B,1,0,ui);CHKERRQ(ierr); 1006 1007 1008 b = (Mat_SeqSBAIJ*)B->data; 1009 uj = b->j; 1010 for (i=0; i<am; i++) { 1011 aj = a->j + a->diag[i]; 1012 for (j=0; j<ui[i]; j++) *uj++ = *aj++; 1013 b->ilen[i] = ui[i]; 1014 } 1015 ierr = PetscFree(ui);CHKERRQ(ierr); 1016 1017 B->factortype = MAT_FACTOR_NONE; 1018 1019 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1020 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1021 B->factortype = MAT_FACTOR_ICC; 1022 1023 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 1024 PetscFunctionReturn(0); 1025 } 1026 1027 /* initialization */ 1028 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 1029 ui[0] = 0; 1030 ierr = PetscMalloc1(2*am+1,&cols_lvl);CHKERRQ(ierr); 1031 1032 /* jl: linked list for storing indices of the pivot rows 1033 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1034 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&il,am,&jl);CHKERRQ(ierr); 1035 for (i=0; i<am; i++) { 1036 jl[i] = am; il[i] = 0; 1037 } 1038 1039 /* create and initialize a linked list for storing column indices of the active row k */ 1040 nlnk = am + 1; 1041 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1042 1043 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 1044 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am]/2,am/2)),&free_space);CHKERRQ(ierr); 1045 1046 current_space = free_space; 1047 1048 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am]/2,am/2)),&free_space_lvl);CHKERRQ(ierr); 1049 current_space_lvl = free_space_lvl; 1050 1051 for (k=0; k<am; k++) { /* for each active row k */ 1052 /* initialize lnk by the column indices of row rip[k] of A */ 1053 nzk = 0; 1054 ncols = ai[rip[k]+1] - ai[rip[k]]; 1055 ncols_upper = 0; 1056 cols = cols_lvl + am; 1057 for (j=0; j<ncols; j++) { 1058 i = rip[*(aj + ai[rip[k]] + j)]; 1059 if (i >= k) { /* only take upper triangular entry */ 1060 cols[ncols_upper] = i; 1061 cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */ 1062 ncols_upper++; 1063 } 1064 } 1065 ierr = PetscIncompleteLLAdd(ncols_upper,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1066 nzk += nlnk; 1067 1068 /* update lnk by computing fill-in for each pivot row to be merged in */ 1069 prow = jl[k]; /* 1st pivot row */ 1070 1071 while (prow < k) { 1072 nextprow = jl[prow]; 1073 1074 /* merge prow into k-th row */ 1075 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1076 jmax = ui[prow+1]; 1077 ncols = jmax-jmin; 1078 i = jmin - ui[prow]; 1079 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1080 for (j=0; j<ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j); 1081 ierr = PetscIncompleteLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1082 nzk += nlnk; 1083 1084 /* update il and jl for prow */ 1085 if (jmin < jmax) { 1086 il[prow] = jmin; 1087 1088 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1089 } 1090 prow = nextprow; 1091 } 1092 1093 /* if free space is not available, make more free space */ 1094 if (current_space->local_remaining<nzk) { 1095 i = am - k + 1; /* num of unfactored rows */ 1096 i = PetscMin(PetscIntMultTruncate(i,nzk), PetscIntMultTruncate(i,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1097 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1098 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1099 reallocs++; 1100 } 1101 1102 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1103 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1104 1105 /* add the k-th row into il and jl */ 1106 if (nzk-1 > 0) { 1107 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1108 jl[k] = jl[i]; jl[i] = k; 1109 il[k] = ui[k] + 1; 1110 } 1111 uj_ptr[k] = current_space->array; 1112 uj_lvl_ptr[k] = current_space_lvl->array; 1113 1114 current_space->array += nzk; 1115 current_space->local_used += nzk; 1116 current_space->local_remaining -= nzk; 1117 1118 current_space_lvl->array += nzk; 1119 current_space_lvl->local_used += nzk; 1120 current_space_lvl->local_remaining -= nzk; 1121 1122 ui[k+1] = ui[k] + nzk; 1123 } 1124 1125 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1126 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,il,jl);CHKERRQ(ierr); 1127 ierr = PetscFree(cols_lvl);CHKERRQ(ierr); 1128 1129 /* copy free_space into uj and free free_space; set uj in new datastructure; */ 1130 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 1131 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1132 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1133 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1134 1135 /* put together the new matrix in MATSEQSBAIJ format */ 1136 B = fact; 1137 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1138 1139 b = (Mat_SeqSBAIJ*)B->data; 1140 b->singlemalloc = PETSC_FALSE; 1141 b->free_a = PETSC_TRUE; 1142 b->free_ij = PETSC_TRUE; 1143 1144 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 1145 1146 b->j = uj; 1147 b->i = ui; 1148 b->diag = 0; 1149 b->ilen = 0; 1150 b->imax = 0; 1151 b->row = perm; 1152 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1153 1154 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1155 1156 b->icol = perm; 1157 1158 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1159 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 1160 ierr = PetscLogObjectMemory((PetscObject)B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1161 1162 b->maxnz = b->nz = ui[am]; 1163 1164 B->info.factor_mallocs = reallocs; 1165 B->info.fill_ratio_given = fill; 1166 if (ai[am] != 0.) { 1167 /* nonzeros in lower triangular part of A (includign diagonals)= (ai[am]+am)/2 */ 1168 B->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 1169 } else { 1170 B->info.fill_ratio_needed = 0.0; 1171 } 1172 #if defined(PETSC_USE_INFO) 1173 if (ai[am] != 0) { 1174 PetscReal af = B->info.fill_ratio_needed; 1175 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 1176 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1177 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 1178 } else { 1179 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1180 } 1181 #endif 1182 if (perm_identity) { 1183 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1184 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1185 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 1186 } else { 1187 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 1188 } 1189 PetscFunctionReturn(0); 1190 } 1191 1192 PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1193 { 1194 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1195 Mat_SeqSBAIJ *b; 1196 Mat B; 1197 PetscErrorCode ierr; 1198 PetscBool perm_identity,missing; 1199 PetscReal fill = info->fill; 1200 const PetscInt *rip; 1201 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,*ai=a->i,*aj=a->j,reallocs=0,prow; 1202 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1203 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1204 PetscFreeSpaceList free_space=NULL,current_space=NULL; 1205 PetscBT lnkbt; 1206 1207 PetscFunctionBegin; 1208 if (bs > 1) { /* convert to seqsbaij */ 1209 if (!a->sbaijMat) { 1210 ierr = MatConvert(A,MATSEQSBAIJ,MAT_INITIAL_MATRIX,&a->sbaijMat);CHKERRQ(ierr); 1211 } 1212 (fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */ 1213 1214 ierr = MatCholeskyFactorSymbolic(fact,a->sbaijMat,perm,info);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1219 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1220 1221 /* check whether perm is the identity mapping */ 1222 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1223 if (!perm_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported"); 1224 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1225 1226 /* initialization */ 1227 ierr = PetscMalloc1(mbs+1,&ui);CHKERRQ(ierr); 1228 ui[0] = 0; 1229 1230 /* jl: linked list for storing indices of the pivot rows 1231 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 1232 ierr = PetscMalloc4(mbs,&ui_ptr,mbs,&il,mbs,&jl,mbs,&cols);CHKERRQ(ierr); 1233 for (i=0; i<mbs; i++) { 1234 jl[i] = mbs; il[i] = 0; 1235 } 1236 1237 /* create and initialize a linked list for storing column indices of the active row k */ 1238 nlnk = mbs + 1; 1239 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1240 1241 /* initial FreeSpace size is fill* (ai[mbs]+mbs)/2 */ 1242 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[mbs]/2,mbs/2)),&free_space);CHKERRQ(ierr); 1243 1244 current_space = free_space; 1245 1246 for (k=0; k<mbs; k++) { /* for each active row k */ 1247 /* initialize lnk by the column indices of row rip[k] of A */ 1248 nzk = 0; 1249 ncols = ai[rip[k]+1] - ai[rip[k]]; 1250 ncols_upper = 0; 1251 for (j=0; j<ncols; j++) { 1252 i = rip[*(aj + ai[rip[k]] + j)]; 1253 if (i >= k) { /* only take upper triangular entry */ 1254 cols[ncols_upper] = i; 1255 ncols_upper++; 1256 } 1257 } 1258 ierr = PetscLLAdd(ncols_upper,cols,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1259 nzk += nlnk; 1260 1261 /* update lnk by computing fill-in for each pivot row to be merged in */ 1262 prow = jl[k]; /* 1st pivot row */ 1263 1264 while (prow < k) { 1265 nextprow = jl[prow]; 1266 /* merge prow into k-th row */ 1267 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 1268 jmax = ui[prow+1]; 1269 ncols = jmax-jmin; 1270 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 1271 ierr = PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1272 nzk += nlnk; 1273 1274 /* update il and jl for prow */ 1275 if (jmin < jmax) { 1276 il[prow] = jmin; 1277 j = *uj_ptr; 1278 jl[prow] = jl[j]; 1279 jl[j] = prow; 1280 } 1281 prow = nextprow; 1282 } 1283 1284 /* if free space is not available, make more free space */ 1285 if (current_space->local_remaining<nzk) { 1286 i = mbs - k + 1; /* num of unfactored rows */ 1287 i = PetscMin(PetscIntMultTruncate(i,nzk), PetscIntMultTruncate(i,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1288 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1289 reallocs++; 1290 } 1291 1292 /* copy data into free space, then initialize lnk */ 1293 ierr = PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1294 1295 /* add the k-th row into il and jl */ 1296 if (nzk-1 > 0) { 1297 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 1298 jl[k] = jl[i]; jl[i] = k; 1299 il[k] = ui[k] + 1; 1300 } 1301 ui_ptr[k] = current_space->array; 1302 current_space->array += nzk; 1303 current_space->local_used += nzk; 1304 current_space->local_remaining -= nzk; 1305 1306 ui[k+1] = ui[k] + nzk; 1307 } 1308 1309 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1310 ierr = PetscFree4(ui_ptr,il,jl,cols);CHKERRQ(ierr); 1311 1312 /* copy free_space into uj and free free_space; set uj in new datastructure; */ 1313 ierr = PetscMalloc1(ui[mbs]+1,&uj);CHKERRQ(ierr); 1314 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1315 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1316 1317 /* put together the new matrix in MATSEQSBAIJ format */ 1318 B = fact; 1319 ierr = MatSeqSBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1320 1321 b = (Mat_SeqSBAIJ*)B->data; 1322 b->singlemalloc = PETSC_FALSE; 1323 b->free_a = PETSC_TRUE; 1324 b->free_ij = PETSC_TRUE; 1325 1326 ierr = PetscMalloc1(ui[mbs]+1,&b->a);CHKERRQ(ierr); 1327 1328 b->j = uj; 1329 b->i = ui; 1330 b->diag = 0; 1331 b->ilen = 0; 1332 b->imax = 0; 1333 b->row = perm; 1334 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1335 1336 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1337 b->icol = perm; 1338 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1339 ierr = PetscMalloc1(mbs+1,&b->solve_work);CHKERRQ(ierr); 1340 ierr = PetscLogObjectMemory((PetscObject)B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1341 b->maxnz = b->nz = ui[mbs]; 1342 1343 B->info.factor_mallocs = reallocs; 1344 B->info.fill_ratio_given = fill; 1345 if (ai[mbs] != 0.) { 1346 /* nonzeros in lower triangular part of A = (ai[mbs]+mbs)/2 */ 1347 B->info.fill_ratio_needed = ((PetscReal)2*ui[mbs])/(ai[mbs]+mbs); 1348 } else { 1349 B->info.fill_ratio_needed = 0.0; 1350 } 1351 #if defined(PETSC_USE_INFO) 1352 if (ai[mbs] != 0.) { 1353 PetscReal af = B->info.fill_ratio_needed; 1354 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 1355 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1356 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 1357 } else { 1358 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1359 } 1360 #endif 1361 if (perm_identity) { 1362 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering; 1363 } else { 1364 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N; 1365 } 1366 PetscFunctionReturn(0); 1367 } 1368 1369 PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx) 1370 { 1371 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1372 PetscErrorCode ierr; 1373 const PetscInt *ai=a->i,*aj=a->j,*adiag=a->diag,*vi; 1374 PetscInt i,k,n=a->mbs; 1375 PetscInt nz,bs=A->rmap->bs,bs2=a->bs2; 1376 const MatScalar *aa=a->a,*v; 1377 PetscScalar *x,*s,*t,*ls; 1378 const PetscScalar *b; 1379 1380 PetscFunctionBegin; 1381 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1382 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1383 t = a->solve_work; 1384 1385 /* forward solve the lower triangular */ 1386 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* copy 1st block of b to t */ 1387 1388 for (i=1; i<n; i++) { 1389 v = aa + bs2*ai[i]; 1390 vi = aj + ai[i]; 1391 nz = ai[i+1] - ai[i]; 1392 s = t + bs*i; 1393 ierr = PetscMemcpy(s,b+bs*i,bs*sizeof(PetscScalar));CHKERRQ(ierr); /* copy i_th block of b to t */ 1394 for (k=0;k<nz;k++) { 1395 PetscKernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*vi[k]); 1396 v += bs2; 1397 } 1398 } 1399 1400 /* backward solve the upper triangular */ 1401 ls = a->solve_work + A->cmap->n; 1402 for (i=n-1; i>=0; i--) { 1403 v = aa + bs2*(adiag[i+1]+1); 1404 vi = aj + adiag[i+1]+1; 1405 nz = adiag[i] - adiag[i+1]-1; 1406 ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1407 for (k=0; k<nz; k++) { 1408 PetscKernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*vi[k]); 1409 v += bs2; 1410 } 1411 PetscKernel_w_gets_A_times_v(bs,ls,aa+bs2*adiag[i],t+i*bs); /* *inv(diagonal[i]) */ 1412 ierr = PetscMemcpy(x+i*bs,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1413 } 1414 1415 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1416 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1417 ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 PetscErrorCode MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx) 1422 { 1423 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data; 1424 IS iscol=a->col,isrow=a->row; 1425 PetscErrorCode ierr; 1426 const PetscInt *r,*c,*rout,*cout,*ai=a->i,*aj=a->j,*adiag=a->diag,*vi; 1427 PetscInt i,m,n=a->mbs; 1428 PetscInt nz,bs=A->rmap->bs,bs2=a->bs2; 1429 const MatScalar *aa=a->a,*v; 1430 PetscScalar *x,*s,*t,*ls; 1431 const PetscScalar *b; 1432 1433 PetscFunctionBegin; 1434 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1435 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1436 t = a->solve_work; 1437 1438 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1439 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1440 1441 /* forward solve the lower triangular */ 1442 ierr = PetscMemcpy(t,b+bs*r[0],bs*sizeof(PetscScalar));CHKERRQ(ierr); 1443 for (i=1; i<n; i++) { 1444 v = aa + bs2*ai[i]; 1445 vi = aj + ai[i]; 1446 nz = ai[i+1] - ai[i]; 1447 s = t + bs*i; 1448 ierr = PetscMemcpy(s,b+bs*r[i],bs*sizeof(PetscScalar));CHKERRQ(ierr); 1449 for (m=0; m<nz; m++) { 1450 PetscKernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*vi[m]); 1451 v += bs2; 1452 } 1453 } 1454 1455 /* backward solve the upper triangular */ 1456 ls = a->solve_work + A->cmap->n; 1457 for (i=n-1; i>=0; i--) { 1458 v = aa + bs2*(adiag[i+1]+1); 1459 vi = aj + adiag[i+1]+1; 1460 nz = adiag[i] - adiag[i+1] - 1; 1461 ierr = PetscMemcpy(ls,t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1462 for (m=0; m<nz; m++) { 1463 PetscKernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*vi[m]); 1464 v += bs2; 1465 } 1466 PetscKernel_w_gets_A_times_v(bs,ls,v,t+i*bs); /* *inv(diagonal[i]) */ 1467 ierr = PetscMemcpy(x + bs*c[i],t+i*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1468 } 1469 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1470 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1471 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1472 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1473 ierr = PetscLogFlops(2.0*(a->bs2)*(a->nz) - A->rmap->bs*A->cmap->n);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 /* 1478 For each block in an block array saves the largest absolute value in the block into another array 1479 */ 1480 static PetscErrorCode MatBlockAbs_private(PetscInt nbs,PetscInt bs2,PetscScalar *blockarray,PetscReal *absarray) 1481 { 1482 PetscErrorCode ierr; 1483 PetscInt i,j; 1484 1485 PetscFunctionBegin; 1486 ierr = PetscMemzero(absarray,(nbs+1)*sizeof(PetscReal));CHKERRQ(ierr); 1487 for (i=0; i<nbs; i++) { 1488 for (j=0; j<bs2; j++) { 1489 if (absarray[i] < PetscAbsScalar(blockarray[i*nbs+j])) absarray[i] = PetscAbsScalar(blockarray[i*nbs+j]); 1490 } 1491 } 1492 PetscFunctionReturn(0); 1493 } 1494 1495 /* 1496 This needs to be renamed and called by the regular MatILUFactor_SeqBAIJ when drop tolerance is used 1497 */ 1498 PetscErrorCode MatILUDTFactor_SeqBAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 1499 { 1500 Mat B = *fact; 1501 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b; 1502 IS isicol; 1503 PetscErrorCode ierr; 1504 const PetscInt *r,*ic; 1505 PetscInt i,mbs=a->mbs,bs=A->rmap->bs,bs2=a->bs2,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 1506 PetscInt *bi,*bj,*bdiag; 1507 1508 PetscInt row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au; 1509 PetscInt nlnk,*lnk; 1510 PetscBT lnkbt; 1511 PetscBool row_identity,icol_identity; 1512 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,*multiplier,*vtmp; 1513 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 1514 1515 PetscReal dt=info->dt; /* shift=info->shiftamount; */ 1516 PetscInt nnz_max; 1517 PetscBool missing; 1518 PetscReal *vtmp_abs; 1519 MatScalar *v_work; 1520 PetscInt *v_pivots; 1521 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 1522 1523 PetscFunctionBegin; 1524 /* ------- symbolic factorization, can be reused ---------*/ 1525 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1526 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1527 adiag=a->diag; 1528 1529 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1530 1531 /* bdiag is location of diagonal in factor */ 1532 ierr = PetscMalloc1(mbs+1,&bdiag);CHKERRQ(ierr); 1533 1534 /* allocate row pointers bi */ 1535 ierr = PetscMalloc1(2*mbs+2,&bi);CHKERRQ(ierr); 1536 1537 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 1538 dtcount = (PetscInt)info->dtcount; 1539 if (dtcount > mbs-1) dtcount = mbs-1; 1540 nnz_max = ai[mbs]+2*mbs*dtcount +2; 1541 /* printf("MatILUDTFactor_SeqBAIJ, bs %d, ai[mbs] %d, nnz_max %d, dtcount %d\n",bs,ai[mbs],nnz_max,dtcount); */ 1542 ierr = PetscMalloc1(nnz_max,&bj);CHKERRQ(ierr); 1543 nnz_max = nnz_max*bs2; 1544 ierr = PetscMalloc1(nnz_max,&ba);CHKERRQ(ierr); 1545 1546 /* put together the new matrix */ 1547 ierr = MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1548 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 1549 1550 b = (Mat_SeqBAIJ*)(B)->data; 1551 b->free_a = PETSC_TRUE; 1552 b->free_ij = PETSC_TRUE; 1553 b->singlemalloc = PETSC_FALSE; 1554 1555 b->a = ba; 1556 b->j = bj; 1557 b->i = bi; 1558 b->diag = bdiag; 1559 b->ilen = 0; 1560 b->imax = 0; 1561 b->row = isrow; 1562 b->col = iscol; 1563 1564 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1565 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1566 1567 b->icol = isicol; 1568 ierr = PetscMalloc1(bs*(mbs+1),&b->solve_work);CHKERRQ(ierr); 1569 ierr = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1570 b->maxnz = nnz_max/bs2; 1571 1572 (B)->factortype = MAT_FACTOR_ILUDT; 1573 (B)->info.factor_mallocs = 0; 1574 (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)(ai[mbs]*bs2)); 1575 /* ------- end of symbolic factorization ---------*/ 1576 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1577 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1578 1579 /* linked list for storing column indices of the active row */ 1580 nlnk = mbs + 1; 1581 ierr = PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1582 1583 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 1584 ierr = PetscMalloc2(mbs,&im,mbs,&jtmp);CHKERRQ(ierr); 1585 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 1586 ierr = PetscMalloc2(mbs*bs2,&rtmp,mbs*bs2,&vtmp);CHKERRQ(ierr); 1587 ierr = PetscMalloc1(mbs+1,&vtmp_abs);CHKERRQ(ierr); 1588 ierr = PetscMalloc3(bs,&v_work,bs2,&multiplier,bs,&v_pivots);CHKERRQ(ierr); 1589 1590 allowzeropivot = PetscNot(A->erroriffailure); 1591 bi[0] = 0; 1592 bdiag[0] = (nnz_max/bs2)-1; /* location of diagonal in factor B */ 1593 bi[2*mbs+1] = bdiag[0]+1; /* endof bj and ba array */ 1594 for (i=0; i<mbs; i++) { 1595 /* copy initial fill into linked list */ 1596 nzi = ai[r[i]+1] - ai[r[i]]; 1597 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1598 nzi_al = adiag[r[i]] - ai[r[i]]; 1599 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 1600 1601 /* load in initial unfactored row */ 1602 ajtmp = aj + ai[r[i]]; 1603 ierr = PetscLLAddPerm(nzi,ajtmp,ic,mbs,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1604 ierr = PetscMemzero(rtmp,mbs*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1605 aatmp = a->a + bs2*ai[r[i]]; 1606 for (j=0; j<nzi; j++) { 1607 ierr = PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],aatmp+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1608 } 1609 1610 /* add pivot rows into linked list */ 1611 row = lnk[mbs]; 1612 while (row < i) { 1613 nzi_bl = bi[row+1] - bi[row] + 1; 1614 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 1615 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 1616 nzi += nlnk; 1617 row = lnk[row]; 1618 } 1619 1620 /* copy data from lnk into jtmp, then initialize lnk */ 1621 ierr = PetscLLClean(mbs,mbs,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 1622 1623 /* numerical factorization */ 1624 bjtmp = jtmp; 1625 row = *bjtmp++; /* 1st pivot row */ 1626 1627 while (row < i) { 1628 pc = rtmp + bs2*row; 1629 pv = ba + bs2*bdiag[row]; /* inv(diag) of the pivot row */ 1630 PetscKernel_A_gets_A_times_B(bs,pc,pv,multiplier); /* pc= multiplier = pc*inv(diag[row]) */ 1631 ierr = MatBlockAbs_private(1,bs2,pc,vtmp_abs);CHKERRQ(ierr); 1632 if (vtmp_abs[0] > dt) { /* apply tolerance dropping rule */ 1633 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 1634 pv = ba + bs2*(bdiag[row+1] + 1); 1635 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 1636 for (j=0; j<nz; j++) { 1637 PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); 1638 } 1639 /* ierr = PetscLogFlops(bslog*(nz+1.0)-bs);CHKERRQ(ierr); */ 1640 } 1641 row = *bjtmp++; 1642 } 1643 1644 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 1645 nzi_bl = 0; j = 0; 1646 while (jtmp[j] < i) { /* L-part. Note: jtmp is sorted */ 1647 ierr = PetscMemcpy(vtmp+bs2*j,rtmp+bs2*jtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1648 nzi_bl++; j++; 1649 } 1650 nzi_bu = nzi - nzi_bl -1; 1651 1652 while (j < nzi) { /* U-part */ 1653 ierr = PetscMemcpy(vtmp+bs2*j,rtmp+bs2*jtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1654 j++; 1655 } 1656 1657 ierr = MatBlockAbs_private(nzi,bs2,vtmp,vtmp_abs);CHKERRQ(ierr); 1658 1659 bjtmp = bj + bi[i]; 1660 batmp = ba + bs2*bi[i]; 1661 /* apply level dropping rule to L part */ 1662 ncut = nzi_al + dtcount; 1663 if (ncut < nzi_bl) { 1664 ierr = PetscSortSplitReal(ncut,nzi_bl,vtmp_abs,jtmp);CHKERRQ(ierr); 1665 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 1666 } else { 1667 ncut = nzi_bl; 1668 } 1669 for (j=0; j<ncut; j++) { 1670 bjtmp[j] = jtmp[j]; 1671 ierr = PetscMemcpy(batmp+bs2*j,rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1672 } 1673 bi[i+1] = bi[i] + ncut; 1674 nzi = ncut + 1; 1675 1676 /* apply level dropping rule to U part */ 1677 ncut = nzi_au + dtcount; 1678 if (ncut < nzi_bu) { 1679 ierr = PetscSortSplitReal(ncut,nzi_bu,vtmp_abs+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 1680 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 1681 } else { 1682 ncut = nzi_bu; 1683 } 1684 nzi += ncut; 1685 1686 /* mark bdiagonal */ 1687 bdiag[i+1] = bdiag[i] - (ncut + 1); 1688 bi[2*mbs - i] = bi[2*mbs - i +1] - (ncut + 1); 1689 1690 bjtmp = bj + bdiag[i]; 1691 batmp = ba + bs2*bdiag[i]; 1692 ierr = PetscMemcpy(batmp,rtmp+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1693 *bjtmp = i; 1694 1695 bjtmp = bj + bdiag[i+1]+1; 1696 batmp = ba + (bdiag[i+1]+1)*bs2; 1697 1698 for (k=0; k<ncut; k++) { 1699 bjtmp[k] = jtmp[nzi_bl+1+k]; 1700 ierr = PetscMemcpy(batmp+bs2*k,rtmp+bs2*bjtmp[k],bs2*sizeof(MatScalar));CHKERRQ(ierr); 1701 } 1702 1703 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 1704 1705 /* invert diagonal block for simplier triangular solves - add shift??? */ 1706 batmp = ba + bs2*bdiag[i]; 1707 1708 ierr = PetscKernel_A_gets_inverse_A(bs,batmp,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 1709 if (zeropivotdetected) B->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1710 } /* for (i=0; i<mbs; i++) */ 1711 ierr = PetscFree3(v_work,multiplier,v_pivots);CHKERRQ(ierr); 1712 1713 /* printf("end of L %d, beginning of U %d\n",bi[mbs],bdiag[mbs]); */ 1714 if (bi[mbs] >= bdiag[mbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[mbs],bdiag[mbs]); 1715 1716 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1717 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1718 1719 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1720 1721 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 1722 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 1723 1724 ierr = PetscLogFlops(bs2*B->cmap->n);CHKERRQ(ierr); 1725 b->maxnz = b->nz = bi[mbs] + bdiag[0] - bdiag[mbs]; 1726 1727 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1728 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 1729 if (row_identity && icol_identity) { 1730 B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering; 1731 } else { 1732 B->ops->solve = MatSolve_SeqBAIJ_N; 1733 } 1734 1735 B->ops->solveadd = 0; 1736 B->ops->solvetranspose = 0; 1737 B->ops->solvetransposeadd = 0; 1738 B->ops->matsolve = 0; 1739 B->assembled = PETSC_TRUE; 1740 B->preallocated = PETSC_TRUE; 1741 PetscFunctionReturn(0); 1742 } 1743