1 2 #include "src/mat/impls/baij/seq/baij.h" 3 #include "src/mat/impls/sbaij/seq/sbaij.h" 4 #include "src/inline/ilu.h" 5 #include "include/petscis.h" 6 7 #if !defined(PETSC_USE_COMPLEX) 8 /* 9 input: 10 F -- numeric factor 11 output: 12 nneg, nzero, npos: matrix inertia 13 */ 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetInertia_SeqSBAIJ" 17 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,int *nneig,int *nzero,int *npos) 18 { 19 Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data; 20 PetscScalar *dd = fact_ptr->a; 21 int mbs=fact_ptr->mbs,bs=fact_ptr->bs,i,nneig_tmp,npos_tmp, 22 *fi = fact_ptr->i; 23 24 PetscFunctionBegin; 25 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %d >1 yet",bs); 26 nneig_tmp = 0; npos_tmp = 0; 27 for (i=0; i<mbs; i++){ 28 if (PetscRealPart(dd[*fi]) > 0.0){ 29 npos_tmp++; 30 } else if (PetscRealPart(dd[*fi]) < 0.0){ 31 nneig_tmp++; 32 } 33 fi++; 34 } 35 if (nneig) *nneig = nneig_tmp; 36 if (npos) *npos = npos_tmp; 37 if (nzero) *nzero = mbs - nneig_tmp - npos_tmp; 38 39 PetscFunctionReturn(0); 40 } 41 #endif /* !defined(PETSC_USE_COMPLEX) */ 42 43 /* Using Modified Sparse Row (MSR) storage. 44 See page 85, "Iterative Methods ..." by Saad. */ 45 /* 46 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 47 */ 48 /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */ 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqSBAIJ" 51 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b; 54 PetscErrorCode ierr; 55 int *rip,i,mbs = a->mbs,*ai,*aj; 56 int *jutmp,bs = a->bs,bs2=a->bs2; 57 int m,realloc = 0,prow; 58 int *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd; 59 int *il,ili,nextprow; 60 PetscReal f = info->fill; 61 PetscTruth perm_identity; 62 63 PetscFunctionBegin; 64 /* check whether perm is the identity mapping */ 65 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 66 67 /* -- inplace factorization, i.e., use sbaij for *B -- */ 68 if (perm_identity && bs==1 ){ 69 if (!perm_identity) a->permute = PETSC_TRUE; 70 71 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 72 73 if (perm_identity){ /* without permutation */ 74 ai = a->i; aj = a->j; 75 } else { /* non-trivial permutation */ 76 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 77 ai = a->inew; aj = a->jnew; 78 } 79 80 /* initialization */ 81 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 82 umax = (int)(f*ai[mbs] + 1); 83 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 84 iu[0] = 0; 85 juidx = 0; /* index for ju */ 86 ierr = PetscMalloc((3*mbs+1)*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for getting pivot row */ 87 q = jl + mbs; /* linked list for col index of active row */ 88 il = q + mbs; 89 for (i=0; i<mbs; i++){ 90 jl[i] = mbs; 91 q[i] = 0; 92 il[i] = 0; 93 } 94 95 /* for each row k */ 96 for (k=0; k<mbs; k++){ 97 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 98 q[k] = mbs; 99 /* initialize nonzero structure of k-th row to row rip[k] of A */ 100 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 101 jmax = ai[rip[k]+1]; 102 for (j=jmin; j<jmax; j++){ 103 vj = rip[aj[j]]; /* col. value */ 104 if(vj > k){ 105 qm = k; 106 do { 107 m = qm; qm = q[m]; 108 } while(qm < vj); 109 if (qm == vj) { 110 SETERRQ(1," error: duplicate entry in A\n"); 111 } 112 nzk++; 113 q[m] = vj; 114 q[vj] = qm; 115 } /* if(vj > k) */ 116 } /* for (j=jmin; j<jmax; j++) */ 117 118 /* modify nonzero structure of k-th row by computing fill-in 119 for each row i to be merged in */ 120 prow = k; 121 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 122 123 while (prow < k){ 124 nextprow = jl[prow]; 125 126 /* merge row prow into k-th row */ 127 ili = il[prow]; 128 jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */ 129 jmax = iu[prow+1]; 130 qm = k; 131 for (j=jmin; j<jmax; j++){ 132 vj = ju[j]; 133 do { 134 m = qm; qm = q[m]; 135 } while (qm < vj); 136 if (qm != vj){ /* a fill */ 137 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 138 } 139 } /* end of for (j=jmin; j<jmax; j++) */ 140 if (jmin < jmax){ 141 il[prow] = jmin; 142 j = ju[jmin]; 143 jl[prow] = jl[j]; jl[j] = prow; /* update jl */ 144 } 145 prow = nextprow; 146 } 147 148 /* update il and jl */ 149 if (nzk > 0){ 150 i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 151 jl[k] = jl[i]; jl[i] = k; 152 il[k] = iu[k] + 1; 153 } 154 iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */ 155 156 /* allocate more space to ju if needed */ 157 if (iu[k+1] > umax) { 158 /* estimate how much additional space we will need */ 159 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 160 /* just double the memory each time */ 161 maxadd = umax; 162 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 163 umax += maxadd; 164 165 /* allocate a longer ju */ 166 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 167 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 168 ierr = PetscFree(ju);CHKERRQ(ierr); 169 ju = jutmp; 170 realloc++; /* count how many times we realloc */ 171 } 172 173 /* save nonzero structure of k-th row in ju */ 174 ju[juidx++] = k; /* diag[k] */ 175 i = k; 176 while (nzk --) { 177 i = q[i]; 178 ju[juidx++] = i; 179 } 180 } 181 182 if (ai[mbs] != 0) { 183 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 184 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 185 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 186 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 187 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 188 } else { 189 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 190 } 191 192 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 193 /* ierr = PetscFree(q);CHKERRQ(ierr); */ 194 ierr = PetscFree(jl);CHKERRQ(ierr); 195 196 /* put together the new matrix */ 197 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 198 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 199 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 200 201 /* PetscLogObjectParent(*B,iperm); */ 202 b = (Mat_SeqSBAIJ*)(*B)->data; 203 ierr = PetscFree(b->imax);CHKERRQ(ierr); 204 b->singlemalloc = PETSC_FALSE; 205 /* the next line frees the default space generated by the Create() */ 206 ierr = PetscFree(b->a);CHKERRQ(ierr); 207 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 208 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 209 b->j = ju; 210 b->i = iu; 211 b->diag = 0; 212 b->ilen = 0; 213 b->imax = 0; 214 b->row = perm; 215 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 216 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 217 b->icol = perm; 218 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 219 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 220 /* In b structure: Free imax, ilen, old a, old j. 221 Allocate idnew, solve_work, new a, new j */ 222 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 223 b->maxnz = b->nz = iu[mbs]; 224 225 (*B)->factor = FACTOR_CHOLESKY; 226 (*B)->info.factor_mallocs = realloc; 227 (*B)->info.fill_ratio_given = f; 228 if (ai[mbs] != 0) { 229 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 230 } else { 231 (*B)->info.fill_ratio_needed = 0.0; 232 } 233 234 235 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 236 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 237 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 238 239 PetscFunctionReturn(0); 240 } 241 /* ----------- end of new code --------------------*/ 242 243 244 if (!perm_identity) a->permute = PETSC_TRUE; 245 246 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 247 248 if (perm_identity){ /* without permutation */ 249 ai = a->i; aj = a->j; 250 } else { /* non-trivial permutation */ 251 ierr = MatReorderingSeqSBAIJ(A,perm);CHKERRQ(ierr); 252 ai = a->inew; aj = a->jnew; 253 } 254 255 /* initialization */ 256 ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);CHKERRQ(ierr); 257 umax = (int)(f*ai[mbs] + 1); umax += mbs + 1; 258 ierr = PetscMalloc(umax*sizeof(int),&ju);CHKERRQ(ierr); 259 iu[0] = mbs+1; 260 juidx = mbs + 1; /* index for ju */ 261 ierr = PetscMalloc(2*mbs*sizeof(int),&jl);CHKERRQ(ierr); /* linked list for pivot row */ 262 q = jl + mbs; /* linked list for col index */ 263 for (i=0; i<mbs; i++){ 264 jl[i] = mbs; 265 q[i] = 0; 266 } 267 268 /* for each row k */ 269 for (k=0; k<mbs; k++){ 270 for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */ 271 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 272 q[k] = mbs; 273 /* initialize nonzero structure of k-th row to row rip[k] of A */ 274 jmin = ai[rip[k]] +1; /* exclude diag[k] */ 275 jmax = ai[rip[k]+1]; 276 for (j=jmin; j<jmax; j++){ 277 vj = rip[aj[j]]; /* col. value */ 278 if(vj > k){ 279 qm = k; 280 do { 281 m = qm; qm = q[m]; 282 } while(qm < vj); 283 if (qm == vj) { 284 SETERRQ(1," error: duplicate entry in A\n"); 285 } 286 nzk++; 287 q[m] = vj; 288 q[vj] = qm; 289 } /* if(vj > k) */ 290 } /* for (j=jmin; j<jmax; j++) */ 291 292 /* modify nonzero structure of k-th row by computing fill-in 293 for each row i to be merged in */ 294 prow = k; 295 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 296 297 while (prow < k){ 298 /* merge row prow into k-th row */ 299 jmin = iu[prow] + 1; jmax = iu[prow+1]; 300 qm = k; 301 for (j=jmin; j<jmax; j++){ 302 vj = ju[j]; 303 do { 304 m = qm; qm = q[m]; 305 } while (qm < vj); 306 if (qm != vj){ 307 nzk++; q[m] = vj; q[vj] = qm; qm = vj; 308 } 309 } 310 prow = jl[prow]; /* next pivot row */ 311 } 312 313 /* add k to row list for first nonzero element in k-th row */ 314 if (nzk > 0){ 315 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 316 jl[k] = jl[i]; jl[i] = k; 317 } 318 iu[k+1] = iu[k] + nzk; 319 320 /* allocate more space to ju if needed */ 321 if (iu[k+1] > umax) { 322 /* estimate how much additional space we will need */ 323 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 324 /* just double the memory each time */ 325 maxadd = umax; 326 if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2; 327 umax += maxadd; 328 329 /* allocate a longer ju */ 330 ierr = PetscMalloc(umax*sizeof(int),&jutmp);CHKERRQ(ierr); 331 ierr = PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));CHKERRQ(ierr); 332 ierr = PetscFree(ju);CHKERRQ(ierr); 333 ju = jutmp; 334 realloc++; /* count how many times we realloc */ 335 } 336 337 /* save nonzero structure of k-th row in ju */ 338 i=k; 339 while (nzk --) { 340 i = q[i]; 341 ju[juidx++] = i; 342 } 343 } 344 345 if (ai[mbs] != 0) { 346 PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 347 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af); 348 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af); 349 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af); 350 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n"); 351 } else { 352 PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n"); 353 } 354 355 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 356 /* ierr = PetscFree(q);CHKERRQ(ierr); */ 357 ierr = PetscFree(jl);CHKERRQ(ierr); 358 359 /* put together the new matrix */ 360 ierr = MatCreate(A->comm,bs*mbs,bs*mbs,bs*mbs,bs*mbs,B);CHKERRQ(ierr); 361 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 362 ierr = MatSeqSBAIJSetPreallocation(*B,bs,0,PETSC_NULL);CHKERRQ(ierr); 363 364 /* PetscLogObjectParent(*B,iperm); */ 365 b = (Mat_SeqSBAIJ*)(*B)->data; 366 ierr = PetscFree(b->imax);CHKERRQ(ierr); 367 b->singlemalloc = PETSC_FALSE; 368 /* the next line frees the default space generated by the Create() */ 369 ierr = PetscFree(b->a);CHKERRQ(ierr); 370 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 371 ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 372 b->j = ju; 373 b->i = iu; 374 b->diag = 0; 375 b->ilen = 0; 376 b->imax = 0; 377 b->row = perm; 378 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 379 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 380 b->icol = perm; 381 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 382 ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 383 /* In b structure: Free imax, ilen, old a, old j. 384 Allocate idnew, solve_work, new a, new j */ 385 PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar))); 386 b->maxnz = b->nz = iu[mbs]; 387 388 (*B)->factor = FACTOR_CHOLESKY; 389 (*B)->info.factor_mallocs = realloc; 390 (*B)->info.fill_ratio_given = f; 391 if (ai[mbs] != 0) { 392 (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]); 393 } else { 394 (*B)->info.fill_ratio_needed = 0.0; 395 } 396 397 if (perm_identity){ 398 switch (bs) { 399 case 1: 400 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 401 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 402 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 403 break; 404 case 2: 405 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 406 (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 407 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 408 break; 409 case 3: 410 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 411 (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 412 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n"); 413 break; 414 case 4: 415 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 416 (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 417 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 418 break; 419 case 5: 420 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 421 (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 422 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 423 break; 424 case 6: 425 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 426 (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 427 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 428 break; 429 case 7: 430 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 431 (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 432 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 433 break; 434 default: 435 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 436 (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 437 PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n"); 438 break; 439 } 440 } 441 442 PetscFunctionReturn(0); 443 } 444 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N" 448 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B) 449 { 450 Mat C = *B; 451 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 452 IS perm = b->row; 453 PetscErrorCode ierr; 454 int *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 455 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 456 int bs=a->bs,bs2 = a->bs2; 457 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 458 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 459 MatScalar *work; 460 int *pivots; 461 462 PetscFunctionBegin; 463 464 /* initialization */ 465 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 466 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 467 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 468 jl = il + mbs; 469 for (i=0; i<mbs; i++) { 470 jl[i] = mbs; il[0] = 0; 471 } 472 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 473 uik = dk + bs2; 474 work = uik + bs2; 475 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 476 477 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 478 479 /* check permutation */ 480 if (!a->permute){ 481 ai = a->i; aj = a->j; aa = a->a; 482 } else { 483 ai = a->inew; aj = a->jnew; 484 ierr = PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 485 ierr = PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 486 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 487 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 488 489 for (i=0; i<mbs; i++){ 490 jmin = ai[i]; jmax = ai[i+1]; 491 for (j=jmin; j<jmax; j++){ 492 while (a2anew[j] != j){ 493 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 494 for (k1=0; k1<bs2; k1++){ 495 dk[k1] = aa[k*bs2+k1]; 496 aa[k*bs2+k1] = aa[j*bs2+k1]; 497 aa[j*bs2+k1] = dk[k1]; 498 } 499 } 500 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 501 if (i > aj[j]){ 502 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 503 ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */ 504 for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 505 for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */ 506 for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1]; 507 } 508 } 509 } 510 } 511 ierr = PetscFree(a2anew);CHKERRQ(ierr); 512 } 513 514 /* for each row k */ 515 for (k = 0; k<mbs; k++){ 516 517 /*initialize k-th row with elements nonzero in row perm(k) of A */ 518 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 519 520 ap = aa + jmin*bs2; 521 for (j = jmin; j < jmax; j++){ 522 vj = perm_ptr[aj[j]]; /* block col. index */ 523 rtmp_ptr = rtmp + vj*bs2; 524 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 525 } 526 527 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 528 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 529 i = jl[k]; /* first row to be added to k_th row */ 530 531 while (i < k){ 532 nexti = jl[i]; /* next row to be added to k_th row */ 533 534 /* compute multiplier */ 535 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 536 537 /* uik = -inv(Di)*U_bar(i,k) */ 538 diag = ba + i*bs2; 539 u = ba + ili*bs2; 540 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 541 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 542 543 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 544 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 545 546 /* update -U(i,k) */ 547 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 548 549 /* add multiple of row i to k-th row ... */ 550 jmin = ili + 1; jmax = bi[i+1]; 551 if (jmin < jmax){ 552 for (j=jmin; j<jmax; j++) { 553 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 554 rtmp_ptr = rtmp + bj[j]*bs2; 555 u = ba + j*bs2; 556 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 557 } 558 559 /* ... add i to row list for next nonzero entry */ 560 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 561 j = bj[jmin]; 562 jl[i] = jl[j]; jl[j] = i; /* update jl */ 563 } 564 i = nexti; 565 } 566 567 /* save nonzero entries in k-th row of U ... */ 568 569 /* invert diagonal block */ 570 diag = ba+k*bs2; 571 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 572 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 573 574 jmin = bi[k]; jmax = bi[k+1]; 575 if (jmin < jmax) { 576 for (j=jmin; j<jmax; j++){ 577 vj = bj[j]; /* block col. index of U */ 578 u = ba + j*bs2; 579 rtmp_ptr = rtmp + vj*bs2; 580 for (k1=0; k1<bs2; k1++){ 581 *u++ = *rtmp_ptr; 582 *rtmp_ptr++ = 0.0; 583 } 584 } 585 586 /* ... add k to row list for first nonzero entry in k-th row */ 587 il[k] = jmin; 588 i = bj[jmin]; 589 jl[k] = jl[i]; jl[i] = k; 590 } 591 } 592 593 ierr = PetscFree(rtmp);CHKERRQ(ierr); 594 ierr = PetscFree(il);CHKERRQ(ierr); 595 ierr = PetscFree(dk);CHKERRQ(ierr); 596 ierr = PetscFree(pivots);CHKERRQ(ierr); 597 if (a->permute){ 598 ierr = PetscFree(aa);CHKERRQ(ierr); 599 } 600 601 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 602 C->factor = FACTOR_CHOLESKY; 603 C->assembled = PETSC_TRUE; 604 C->preallocated = PETSC_TRUE; 605 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 606 PetscFunctionReturn(0); 607 } 608 609 #undef __FUNCT__ 610 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering" 611 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B) 612 { 613 Mat C = *B; 614 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 615 PetscErrorCode ierr; 616 int i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 617 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 618 int bs=a->bs,bs2 = a->bs2; 619 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 620 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 621 MatScalar *work; 622 int *pivots; 623 624 PetscFunctionBegin; 625 626 /* initialization */ 627 628 ierr = PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 629 ierr = PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));CHKERRQ(ierr); 630 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 631 jl = il + mbs; 632 for (i=0; i<mbs; i++) { 633 jl[i] = mbs; il[0] = 0; 634 } 635 ierr = PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);CHKERRQ(ierr); 636 uik = dk + bs2; 637 work = uik + bs2; 638 ierr = PetscMalloc(bs*sizeof(int),&pivots);CHKERRQ(ierr); 639 640 ai = a->i; aj = a->j; aa = a->a; 641 642 /* for each row k */ 643 for (k = 0; k<mbs; k++){ 644 645 /*initialize k-th row with elements nonzero in row k of A */ 646 jmin = ai[k]; jmax = ai[k+1]; 647 ap = aa + jmin*bs2; 648 for (j = jmin; j < jmax; j++){ 649 vj = aj[j]; /* block col. index */ 650 rtmp_ptr = rtmp + vj*bs2; 651 for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++; 652 } 653 654 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 655 ierr = PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 656 i = jl[k]; /* first row to be added to k_th row */ 657 658 while (i < k){ 659 nexti = jl[i]; /* next row to be added to k_th row */ 660 661 /* compute multiplier */ 662 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 663 664 /* uik = -inv(Di)*U_bar(i,k) */ 665 diag = ba + i*bs2; 666 u = ba + ili*bs2; 667 ierr = PetscMemzero(uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 668 Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u); 669 670 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 671 Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u); 672 673 /* update -U(i,k) */ 674 ierr = PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));CHKERRQ(ierr); 675 676 /* add multiple of row i to k-th row ... */ 677 jmin = ili + 1; jmax = bi[i+1]; 678 if (jmin < jmax){ 679 for (j=jmin; j<jmax; j++) { 680 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 681 rtmp_ptr = rtmp + bj[j]*bs2; 682 u = ba + j*bs2; 683 Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u); 684 } 685 686 /* ... add i to row list for next nonzero entry */ 687 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 688 j = bj[jmin]; 689 jl[i] = jl[j]; jl[j] = i; /* update jl */ 690 } 691 i = nexti; 692 } 693 694 /* save nonzero entries in k-th row of U ... */ 695 696 /* invert diagonal block */ 697 diag = ba+k*bs2; 698 ierr = PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));CHKERRQ(ierr); 699 Kernel_A_gets_inverse_A(bs,diag,pivots,work); 700 701 jmin = bi[k]; jmax = bi[k+1]; 702 if (jmin < jmax) { 703 for (j=jmin; j<jmax; j++){ 704 vj = bj[j]; /* block col. index of U */ 705 u = ba + j*bs2; 706 rtmp_ptr = rtmp + vj*bs2; 707 for (k1=0; k1<bs2; k1++){ 708 *u++ = *rtmp_ptr; 709 *rtmp_ptr++ = 0.0; 710 } 711 } 712 713 /* ... add k to row list for first nonzero entry in k-th row */ 714 il[k] = jmin; 715 i = bj[jmin]; 716 jl[k] = jl[i]; jl[i] = k; 717 } 718 } 719 720 ierr = PetscFree(rtmp);CHKERRQ(ierr); 721 ierr = PetscFree(il);CHKERRQ(ierr); 722 ierr = PetscFree(dk);CHKERRQ(ierr); 723 ierr = PetscFree(pivots);CHKERRQ(ierr); 724 725 C->factor = FACTOR_CHOLESKY; 726 C->assembled = PETSC_TRUE; 727 C->preallocated = PETSC_TRUE; 728 PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 729 PetscFunctionReturn(0); 730 } 731 732 /* 733 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 734 Version for blocks 2 by 2. 735 */ 736 #undef __FUNCT__ 737 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2" 738 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B) 739 { 740 Mat C = *B; 741 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 742 IS perm = b->row; 743 PetscErrorCode ierr; 744 int *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 745 int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 746 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 747 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 748 749 PetscFunctionBegin; 750 751 /* initialization */ 752 /* il and jl record the first nonzero element in each row of the accessing 753 window U(0:k, k:mbs-1). 754 jl: list of rows to be added to uneliminated rows 755 i>= k: jl(i) is the first row to be added to row i 756 i< k: jl(i) is the row following row i in some list of rows 757 jl(i) = mbs indicates the end of a list 758 il(i): points to the first nonzero element in columns k,...,mbs-1 of 759 row i of U */ 760 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 761 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 762 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 763 jl = il + mbs; 764 for (i=0; i<mbs; i++) { 765 jl[i] = mbs; il[0] = 0; 766 } 767 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 768 uik = dk + 4; 769 ierr = ISGetIndices(perm,&perm_ptr);CHKERRQ(ierr); 770 771 /* check permutation */ 772 if (!a->permute){ 773 ai = a->i; aj = a->j; aa = a->a; 774 } else { 775 ai = a->inew; aj = a->jnew; 776 ierr = PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 777 ierr = PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 778 ierr = PetscMalloc(ai[mbs]*sizeof(int),&a2anew);CHKERRQ(ierr); 779 ierr = PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 780 781 for (i=0; i<mbs; i++){ 782 jmin = ai[i]; jmax = ai[i+1]; 783 for (j=jmin; j<jmax; j++){ 784 while (a2anew[j] != j){ 785 k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k; 786 for (k1=0; k1<4; k1++){ 787 dk[k1] = aa[k*4+k1]; 788 aa[k*4+k1] = aa[j*4+k1]; 789 aa[j*4+k1] = dk[k1]; 790 } 791 } 792 /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */ 793 if (i > aj[j]){ 794 /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */ 795 ap = aa + j*4; /* ptr to the beginning of the block */ 796 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 797 ap[1] = ap[2]; 798 ap[2] = dk[1]; 799 } 800 } 801 } 802 ierr = PetscFree(a2anew);CHKERRQ(ierr); 803 } 804 805 /* for each row k */ 806 for (k = 0; k<mbs; k++){ 807 808 /*initialize k-th row with elements nonzero in row perm(k) of A */ 809 jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1]; 810 ap = aa + jmin*4; 811 for (j = jmin; j < jmax; j++){ 812 vj = perm_ptr[aj[j]]; /* block col. index */ 813 rtmp_ptr = rtmp + vj*4; 814 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 815 } 816 817 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 818 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 819 i = jl[k]; /* first row to be added to k_th row */ 820 821 while (i < k){ 822 nexti = jl[i]; /* next row to be added to k_th row */ 823 824 /* compute multiplier */ 825 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 826 827 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 828 diag = ba + i*4; 829 u = ba + ili*4; 830 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 831 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 832 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 833 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 834 835 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 836 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 837 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 838 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 839 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 840 841 /* update -U(i,k): ba[ili] = uik */ 842 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 843 844 /* add multiple of row i to k-th row ... */ 845 jmin = ili + 1; jmax = bi[i+1]; 846 if (jmin < jmax){ 847 for (j=jmin; j<jmax; j++) { 848 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 849 rtmp_ptr = rtmp + bj[j]*4; 850 u = ba + j*4; 851 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 852 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 853 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 854 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 855 } 856 857 /* ... add i to row list for next nonzero entry */ 858 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 859 j = bj[jmin]; 860 jl[i] = jl[j]; jl[j] = i; /* update jl */ 861 } 862 i = nexti; 863 } 864 865 /* save nonzero entries in k-th row of U ... */ 866 867 /* invert diagonal block */ 868 diag = ba+k*4; 869 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 870 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 871 872 jmin = bi[k]; jmax = bi[k+1]; 873 if (jmin < jmax) { 874 for (j=jmin; j<jmax; j++){ 875 vj = bj[j]; /* block col. index of U */ 876 u = ba + j*4; 877 rtmp_ptr = rtmp + vj*4; 878 for (k1=0; k1<4; k1++){ 879 *u++ = *rtmp_ptr; 880 *rtmp_ptr++ = 0.0; 881 } 882 } 883 884 /* ... add k to row list for first nonzero entry in k-th row */ 885 il[k] = jmin; 886 i = bj[jmin]; 887 jl[k] = jl[i]; jl[i] = k; 888 } 889 } 890 891 ierr = PetscFree(rtmp);CHKERRQ(ierr); 892 ierr = PetscFree(il);CHKERRQ(ierr); 893 ierr = PetscFree(dk);CHKERRQ(ierr); 894 if (a->permute) { 895 ierr = PetscFree(aa);CHKERRQ(ierr); 896 } 897 ierr = ISRestoreIndices(perm,&perm_ptr);CHKERRQ(ierr); 898 C->factor = FACTOR_CHOLESKY; 899 C->assembled = PETSC_TRUE; 900 C->preallocated = PETSC_TRUE; 901 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 902 PetscFunctionReturn(0); 903 } 904 905 /* 906 Version for when blocks are 2 by 2 Using natural ordering 907 */ 908 #undef __FUNCT__ 909 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering" 910 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B) 911 { 912 Mat C = *B; 913 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 914 PetscErrorCode ierr; 915 int i,j,mbs=a->mbs,*bi=b->i,*bj=b->j; 916 int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili; 917 MatScalar *ba = b->a,*aa,*ap,*dk,*uik; 918 MatScalar *u,*diag,*rtmp,*rtmp_ptr; 919 920 PetscFunctionBegin; 921 922 /* initialization */ 923 /* il and jl record the first nonzero element in each row of the accessing 924 window U(0:k, k:mbs-1). 925 jl: list of rows to be added to uneliminated rows 926 i>= k: jl(i) is the first row to be added to row i 927 i< k: jl(i) is the row following row i in some list of rows 928 jl(i) = mbs indicates the end of a list 929 il(i): points to the first nonzero element in columns k,...,mbs-1 of 930 row i of U */ 931 ierr = PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 932 ierr = PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));CHKERRQ(ierr); 933 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 934 jl = il + mbs; 935 for (i=0; i<mbs; i++) { 936 jl[i] = mbs; il[0] = 0; 937 } 938 ierr = PetscMalloc(8*sizeof(MatScalar),&dk);CHKERRQ(ierr); 939 uik = dk + 4; 940 941 ai = a->i; aj = a->j; aa = a->a; 942 943 /* for each row k */ 944 for (k = 0; k<mbs; k++){ 945 946 /*initialize k-th row with elements nonzero in row k of A */ 947 jmin = ai[k]; jmax = ai[k+1]; 948 ap = aa + jmin*4; 949 for (j = jmin; j < jmax; j++){ 950 vj = aj[j]; /* block col. index */ 951 rtmp_ptr = rtmp + vj*4; 952 for (i=0; i<4; i++) *rtmp_ptr++ = *ap++; 953 } 954 955 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 956 ierr = PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));CHKERRQ(ierr); 957 i = jl[k]; /* first row to be added to k_th row */ 958 959 while (i < k){ 960 nexti = jl[i]; /* next row to be added to k_th row */ 961 962 /* compute multiplier */ 963 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 964 965 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 966 diag = ba + i*4; 967 u = ba + ili*4; 968 uik[0] = -(diag[0]*u[0] + diag[2]*u[1]); 969 uik[1] = -(diag[1]*u[0] + diag[3]*u[1]); 970 uik[2] = -(diag[0]*u[2] + diag[2]*u[3]); 971 uik[3] = -(diag[1]*u[2] + diag[3]*u[3]); 972 973 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 974 dk[0] += uik[0]*u[0] + uik[1]*u[1]; 975 dk[1] += uik[2]*u[0] + uik[3]*u[1]; 976 dk[2] += uik[0]*u[2] + uik[1]*u[3]; 977 dk[3] += uik[2]*u[2] + uik[3]*u[3]; 978 979 /* update -U(i,k): ba[ili] = uik */ 980 ierr = PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));CHKERRQ(ierr); 981 982 /* add multiple of row i to k-th row ... */ 983 jmin = ili + 1; jmax = bi[i+1]; 984 if (jmin < jmax){ 985 for (j=jmin; j<jmax; j++) { 986 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 987 rtmp_ptr = rtmp + bj[j]*4; 988 u = ba + j*4; 989 rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1]; 990 rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1]; 991 rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3]; 992 rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3]; 993 } 994 995 /* ... add i to row list for next nonzero entry */ 996 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 997 j = bj[jmin]; 998 jl[i] = jl[j]; jl[j] = i; /* update jl */ 999 } 1000 i = nexti; 1001 } 1002 1003 /* save nonzero entries in k-th row of U ... */ 1004 1005 /* invert diagonal block */ 1006 diag = ba+k*4; 1007 ierr = PetscMemcpy(diag,dk,4*sizeof(MatScalar));CHKERRQ(ierr); 1008 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 1009 1010 jmin = bi[k]; jmax = bi[k+1]; 1011 if (jmin < jmax) { 1012 for (j=jmin; j<jmax; j++){ 1013 vj = bj[j]; /* block col. index of U */ 1014 u = ba + j*4; 1015 rtmp_ptr = rtmp + vj*4; 1016 for (k1=0; k1<4; k1++){ 1017 *u++ = *rtmp_ptr; 1018 *rtmp_ptr++ = 0.0; 1019 } 1020 } 1021 1022 /* ... add k to row list for first nonzero entry in k-th row */ 1023 il[k] = jmin; 1024 i = bj[jmin]; 1025 jl[k] = jl[i]; jl[i] = k; 1026 } 1027 } 1028 1029 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1030 ierr = PetscFree(il);CHKERRQ(ierr); 1031 ierr = PetscFree(dk);CHKERRQ(ierr); 1032 1033 C->factor = FACTOR_CHOLESKY; 1034 C->assembled = PETSC_TRUE; 1035 C->preallocated = PETSC_TRUE; 1036 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 1037 PetscFunctionReturn(0); 1038 } 1039 1040 /* 1041 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 1042 Version for blocks are 1 by 1. 1043 */ 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1" 1046 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B) 1047 { 1048 Mat C = *B; 1049 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data; 1050 IS ip = b->row; 1051 PetscErrorCode ierr; 1052 int *rip,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j; 1053 int *ai,*aj,*r; 1054 int k,jmin,jmax,*jl,*il,vj,nexti,ili; 1055 MatScalar *rtmp; 1056 MatScalar *ba = b->a,*aa,ak; 1057 MatScalar dk,uikdi; 1058 1059 PetscFunctionBegin; 1060 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1061 if (!a->permute){ 1062 ai = a->i; aj = a->j; aa = a->a; 1063 } else { 1064 ai = a->inew; aj = a->jnew; 1065 ierr = PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);CHKERRQ(ierr); 1066 ierr = PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));CHKERRQ(ierr); 1067 ierr = PetscMalloc(ai[mbs]*sizeof(int),&r);CHKERRQ(ierr); 1068 ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));CHKERRQ(ierr); 1069 1070 jmin = ai[0]; jmax = ai[mbs]; 1071 for (j=jmin; j<jmax; j++){ 1072 while (r[j] != j){ 1073 k = r[j]; r[j] = r[k]; r[k] = k; 1074 ak = aa[k]; aa[k] = aa[j]; aa[j] = ak; 1075 } 1076 } 1077 ierr = PetscFree(r);CHKERRQ(ierr); 1078 } 1079 1080 /* initialization */ 1081 /* il and jl record the first nonzero element in each row of the accessing 1082 window U(0:k, k:mbs-1). 1083 jl: list of rows to be added to uneliminated rows 1084 i>= k: jl(i) is the first row to be added to row i 1085 i< k: jl(i) is the row following row i in some list of rows 1086 jl(i) = mbs indicates the end of a list 1087 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1088 row i of U */ 1089 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1090 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 1091 jl = il + mbs; 1092 for (i=0; i<mbs; i++) { 1093 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1094 } 1095 1096 /* for each row k */ 1097 for (k = 0; k<mbs; k++){ 1098 1099 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1100 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1101 1102 for (j = jmin; j < jmax; j++){ 1103 vj = rip[aj[j]]; 1104 rtmp[vj] = aa[j]; 1105 } 1106 1107 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1108 dk = rtmp[k]; 1109 i = jl[k]; /* first row to be added to k_th row */ 1110 1111 while (i < k){ 1112 nexti = jl[i]; /* next row to be added to k_th row */ 1113 1114 /* compute multiplier, update D(k) and U(i,k) */ 1115 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1116 uikdi = - ba[ili]*ba[i]; 1117 dk += uikdi*ba[ili]; 1118 ba[ili] = uikdi; /* -U(i,k) */ 1119 1120 /* add multiple of row i to k-th row ... */ 1121 jmin = ili + 1; jmax = bi[i+1]; 1122 if (jmin < jmax){ 1123 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1124 /* ... add i to row list for next nonzero entry */ 1125 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1126 j = bj[jmin]; 1127 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1128 } 1129 i = nexti; 1130 } 1131 1132 /* check for zero pivot and save diagoanl element */ 1133 if (dk == 0.0){ 1134 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 1135 /* 1136 } else if (PetscRealPart(dk) < 0.0){ 1137 SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk); 1138 */ 1139 } 1140 1141 /* save nonzero entries in k-th row of U ... */ 1142 ba[k] = 1.0/dk; 1143 jmin = bi[k]; jmax = bi[k+1]; 1144 if (jmin < jmax) { 1145 for (j=jmin; j<jmax; j++){ 1146 vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0; 1147 } 1148 /* ... add k to row list for first nonzero entry in k-th row */ 1149 il[k] = jmin; 1150 i = bj[jmin]; 1151 jl[k] = jl[i]; jl[i] = k; 1152 } 1153 } 1154 1155 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1156 ierr = PetscFree(il);CHKERRQ(ierr); 1157 if (a->permute){ 1158 ierr = PetscFree(aa);CHKERRQ(ierr); 1159 } 1160 1161 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1162 C->factor = FACTOR_CHOLESKY; 1163 C->assembled = PETSC_TRUE; 1164 C->preallocated = PETSC_TRUE; 1165 PetscLogFlops(b->mbs); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 /* 1170 Version for when blocks are 1 by 1 Using natural ordering 1171 */ 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering" 1174 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B) 1175 { 1176 Mat C = *B; 1177 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data; 1178 PetscErrorCode ierr; 1179 int i,j,mbs = a->mbs; 1180 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 1181 int k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0; 1182 MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval; 1183 PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount; 1184 PetscTruth damp,chshift; 1185 int nshift=0; 1186 1187 PetscFunctionBegin; 1188 /* initialization */ 1189 /* il and jl record the first nonzero element in each row of the accessing 1190 window U(0:k, k:mbs-1). 1191 jl: list of rows to be added to uneliminated rows 1192 i>= k: jl(i) is the first row to be added to row i 1193 i< k: jl(i) is the row following row i in some list of rows 1194 jl(i) = mbs indicates the end of a list 1195 il(i): points to the first nonzero element in U(i,k:mbs-1) 1196 */ 1197 ierr = PetscMalloc(mbs*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1198 ierr = PetscMalloc(2*mbs*sizeof(int),&il);CHKERRQ(ierr); 1199 jl = il + mbs; 1200 1201 shift_amount = 0; 1202 do { 1203 damp = PETSC_FALSE; 1204 chshift = PETSC_FALSE; 1205 for (i=0; i<mbs; i++) { 1206 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1207 } 1208 1209 for (k = 0; k<mbs; k++){ /* row k */ 1210 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1211 nz = ai[k+1] - ai[k]; 1212 acol = aj + ai[k]; 1213 aval = aa + ai[k]; 1214 bval = ba + bi[k]; 1215 while (nz -- ){ 1216 rtmp[*acol++] = *aval++; 1217 *bval++ = 0.0; /* for in-place factorization */ 1218 } 1219 /* damp the diagonal of the matrix */ 1220 if (ndamp||nshift) rtmp[k] += damping+shift_amount; 1221 1222 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1223 dk = rtmp[k]; 1224 i = jl[k]; /* first row to be added to k_th row */ 1225 1226 while (i < k){ 1227 nexti = jl[i]; /* next row to be added to k_th row */ 1228 1229 /* compute multiplier, update D(k) and U(i,k) */ 1230 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1231 uikdi = - ba[ili]*ba[bi[i]]; 1232 dk += uikdi*ba[ili]; 1233 ba[ili] = uikdi; /* -U(i,k) */ 1234 1235 /* add multiple of row i to k-th row ... */ 1236 jmin = ili + 1; 1237 nz = bi[i+1] - jmin; 1238 if (nz > 0){ 1239 bcol = bj + jmin; 1240 bval = ba + jmin; 1241 while (nz --) rtmp[*bcol++] += uikdi*(*bval++); 1242 /* ... add i to row list for next nonzero entry */ 1243 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1244 j = bj[jmin]; 1245 jl[i] = jl[j]; jl[j] = i; /* update jl */ 1246 } 1247 i = nexti; 1248 } 1249 1250 if (PetscRealPart(dk) < zeropivot && b->factor_shift){ 1251 /* calculate a shift that would make this row diagonally dominant */ 1252 PetscReal rs = PetscAbs(PetscRealPart(dk)); 1253 jmin = bi[k]+1; 1254 nz = bi[k+1] - jmin; 1255 if (nz){ 1256 bcol = bj + jmin; 1257 bval = ba + jmin; 1258 while (nz--){ 1259 rs += PetscAbsScalar(rtmp[*bcol++]); 1260 } 1261 } 1262 /* if this shift is less than the previous, just up the previous 1263 one by a bit */ 1264 shift_amount = PetscMax(rs,1.1*shift_amount); 1265 chshift = PETSC_TRUE; 1266 /* Unlike in the ILU case there is no exit condition on nshift: 1267 we increase the shift until it converges. There is no guarantee that 1268 this algorithm converges faster or slower, or is better or worse 1269 than the ILU algorithm. */ 1270 nshift++; 1271 break; 1272 } 1273 if (PetscRealPart(dk) < zeropivot){ 1274 if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1); 1275 if (damping > 0.0) { 1276 if (ndamp) damping *= 2.0; 1277 damp = PETSC_TRUE; 1278 ndamp++; 1279 break; 1280 } else if (PetscAbsScalar(dk) < zeropivot){ 1281 SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",k,PetscRealPart(dk),zeropivot); 1282 } else { 1283 PetscLogInfo((PetscObject)A,"Negative pivot %g in row %d of Cholesky factorization\n",PetscRealPart(dk),k); 1284 } 1285 } 1286 1287 /* save nonzero entries in k-th row of U ... */ 1288 /* printf("%d, dk: %g, 1/dk: %g\n",k,dk,1/dk); */ 1289 ba[bi[k]] = 1.0/dk; 1290 jmin = bi[k]+1; 1291 nz = bi[k+1] - jmin; 1292 if (nz){ 1293 bcol = bj + jmin; 1294 bval = ba + jmin; 1295 while (nz--){ 1296 *bval++ = rtmp[*bcol]; 1297 rtmp[*bcol++] = 0.0; 1298 } 1299 /* ... add k to row list for first nonzero entry in k-th row */ 1300 il[k] = jmin; 1301 i = bj[jmin]; 1302 jl[k] = jl[i]; jl[i] = k; 1303 } 1304 } /* end of for (k = 0; k<mbs; k++) */ 1305 } while (damp||chshift); 1306 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1307 ierr = PetscFree(il);CHKERRQ(ierr); 1308 1309 C->factor = FACTOR_CHOLESKY; 1310 C->assembled = PETSC_TRUE; 1311 C->preallocated = PETSC_TRUE; 1312 PetscLogFlops(b->mbs); 1313 if (ndamp) { 1314 PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqSBAIJ_1_NaturalOrdering: number of damping tries %d damping value %g\n",ndamp,damping); 1315 } 1316 if (nshift) { 1317 PetscLogInfo(0,"MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering diagonal shifted %d shifts\n",nshift); 1318 } 1319 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatCholeskyFactor_SeqSBAIJ" 1325 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info) 1326 { 1327 PetscErrorCode ierr; 1328 Mat C; 1329 1330 PetscFunctionBegin; 1331 ierr = MatCholeskyFactorSymbolic(A,perm,info,&C);CHKERRQ(ierr); 1332 ierr = MatCholeskyFactorNumeric(A,&C);CHKERRQ(ierr); 1333 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 1338