1 2 /* 3 Factorization code for BAIJ format. 4 */ 5 6 #include <../src/mat/impls/baij/seq/baij.h> 7 #include <petsc/private/kernels/blockinvert.h> 8 #include <petscbt.h> 9 #include <../src/mat/utils/freespace.h> 10 11 /* ----------------------------------------------------------------*/ 12 extern PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat,Mat,MatDuplicateOption,PetscBool); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering" 16 /* 17 This is not much faster than MatLUFactorNumeric_SeqBAIJ_N() but the solve is faster at least sometimes 18 */ 19 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info) 20 { 21 Mat C =B; 22 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; 23 PetscErrorCode ierr; 24 PetscInt i,j,k,ipvt[15]; 25 const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ajtmp,*bjtmp,*bdiag=b->diag,*pj; 26 PetscInt nz,nzL,row; 27 MatScalar *rtmp,*pc,*mwork,*pv,*vv,work[225]; 28 const MatScalar *v,*aa=a->a; 29 PetscInt bs2 = a->bs2,bs=A->rmap->bs,flg; 30 PetscInt sol_ver; 31 32 PetscFunctionBegin; 33 ierr = PetscOptionsGetInt(((PetscObject)A)->prefix,"-sol_ver",&sol_ver,NULL);CHKERRQ(ierr); 34 35 /* generate work space needed by the factorization */ 36 ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); 37 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 38 39 for (i=0; i<n; i++) { 40 /* zero rtmp */ 41 /* L part */ 42 nz = bi[i+1] - bi[i]; 43 bjtmp = bj + bi[i]; 44 for (j=0; j<nz; j++) { 45 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 46 } 47 48 /* U part */ 49 nz = bdiag[i] - bdiag[i+1]; 50 bjtmp = bj + bdiag[i+1]+1; 51 for (j=0; j<nz; j++) { 52 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 53 } 54 55 /* load in initial (unfactored row) */ 56 nz = ai[i+1] - ai[i]; 57 ajtmp = aj + ai[i]; 58 v = aa + bs2*ai[i]; 59 for (j=0; j<nz; j++) { 60 ierr = PetscMemcpy(rtmp+bs2*ajtmp[j],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 61 } 62 63 /* elimination */ 64 bjtmp = bj + bi[i]; 65 nzL = bi[i+1] - bi[i]; 66 for (k=0; k < nzL; k++) { 67 row = bjtmp[k]; 68 pc = rtmp + bs2*row; 69 for (flg=0,j=0; j<bs2; j++) { 70 if (pc[j]!=0.0) { 71 flg = 1; 72 break; 73 } 74 } 75 if (flg) { 76 pv = b->a + bs2*bdiag[row]; 77 PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); 78 /*ierr = PetscKernel_A_gets_A_times_B_15(pc,pv,mwork);CHKERRQ(ierr);*/ 79 pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ 80 pv = b->a + bs2*(bdiag[row+1]+1); 81 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ 82 for (j=0; j<nz; j++) { 83 vv = rtmp + bs2*pj[j]; 84 PetscKernel_A_gets_A_minus_B_times_C(bs,vv,pc,pv); 85 /* ierr = PetscKernel_A_gets_A_minus_B_times_C_15(vv,pc,pv);CHKERRQ(ierr); */ 86 pv += bs2; 87 } 88 ierr = PetscLogFlops(2*bs2*bs*(nz+1)-bs2);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 89 } 90 } 91 92 /* finished row so stick it into b->a */ 93 /* L part */ 94 pv = b->a + bs2*bi[i]; 95 pj = b->j + bi[i]; 96 nz = bi[i+1] - bi[i]; 97 for (j=0; j<nz; j++) { 98 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 99 } 100 101 /* Mark diagonal and invert diagonal for simplier triangular solves */ 102 pv = b->a + bs2*bdiag[i]; 103 pj = b->j + bdiag[i]; 104 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 105 /* PetscKernel_A_gets_inverse_A(bs,pv,pivots,work); */ 106 ierr = PetscKernel_A_gets_inverse_A_15(pv,ipvt,work,info->shiftamount);CHKERRQ(ierr); 107 108 /* U part */ 109 pv = b->a + bs2*(bdiag[i+1]+1); 110 pj = b->j + bdiag[i+1]+1; 111 nz = bdiag[i] - bdiag[i+1] - 1; 112 for (j=0; j<nz; j++) { 113 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 114 } 115 } 116 117 ierr = PetscFree2(rtmp,mwork);CHKERRQ(ierr); 118 119 C->ops->solve = MatSolve_SeqBAIJ_15_NaturalOrdering_ver1; 120 C->ops->solvetranspose = MatSolve_SeqBAIJ_N_NaturalOrdering; 121 C->assembled = PETSC_TRUE; 122 123 ierr = PetscLogFlops(1.333333333333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_N" 129 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N(Mat B,Mat A,const MatFactorInfo *info) 130 { 131 Mat C =B; 132 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; 133 IS isrow = b->row,isicol = b->icol; 134 PetscErrorCode ierr; 135 const PetscInt *r,*ic; 136 PetscInt i,j,k,n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 137 PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 138 MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a; 139 PetscInt bs=A->rmap->bs,bs2 = a->bs2,*v_pivots,flg; 140 MatScalar *v_work; 141 PetscBool col_identity,row_identity,both_identity; 142 143 PetscFunctionBegin; 144 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 145 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 146 147 ierr = PetscMalloc1(bs2*n,&rtmp);CHKERRQ(ierr); 148 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 149 150 /* generate work space needed by dense LU factorization */ 151 ierr = PetscMalloc3(bs,&v_work,bs2,&mwork,bs,&v_pivots);CHKERRQ(ierr); 152 153 for (i=0; i<n; i++) { 154 /* zero rtmp */ 155 /* L part */ 156 nz = bi[i+1] - bi[i]; 157 bjtmp = bj + bi[i]; 158 for (j=0; j<nz; j++) { 159 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 160 } 161 162 /* U part */ 163 nz = bdiag[i] - bdiag[i+1]; 164 bjtmp = bj + bdiag[i+1]+1; 165 for (j=0; j<nz; j++) { 166 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 167 } 168 169 /* load in initial (unfactored row) */ 170 nz = ai[r[i]+1] - ai[r[i]]; 171 ajtmp = aj + ai[r[i]]; 172 v = aa + bs2*ai[r[i]]; 173 for (j=0; j<nz; j++) { 174 ierr = PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 175 } 176 177 /* elimination */ 178 bjtmp = bj + bi[i]; 179 nzL = bi[i+1] - bi[i]; 180 for (k=0; k < nzL; k++) { 181 row = bjtmp[k]; 182 pc = rtmp + bs2*row; 183 for (flg=0,j=0; j<bs2; j++) { 184 if (pc[j]!=0.0) { 185 flg = 1; 186 break; 187 } 188 } 189 if (flg) { 190 pv = b->a + bs2*bdiag[row]; 191 PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); /* *pc = *pc * (*pv); */ 192 pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ 193 pv = b->a + bs2*(bdiag[row+1]+1); 194 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ 195 for (j=0; j<nz; j++) { 196 PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); 197 } 198 ierr = PetscLogFlops(2*bs2*bs*(nz+1)-bs2);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 199 } 200 } 201 202 /* finished row so stick it into b->a */ 203 /* L part */ 204 pv = b->a + bs2*bi[i]; 205 pj = b->j + bi[i]; 206 nz = bi[i+1] - bi[i]; 207 for (j=0; j<nz; j++) { 208 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 209 } 210 211 /* Mark diagonal and invert diagonal for simplier triangular solves */ 212 pv = b->a + bs2*bdiag[i]; 213 pj = b->j + bdiag[i]; 214 /* if (*pj != i)SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"row %d != *pj %d",i,*pj); */ 215 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 216 ierr = PetscKernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); 217 218 /* U part */ 219 pv = b->a + bs2*(bdiag[i+1]+1); 220 pj = b->j + bdiag[i+1]+1; 221 nz = bdiag[i] - bdiag[i+1] - 1; 222 for (j=0; j<nz; j++) { 223 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 224 } 225 } 226 227 ierr = PetscFree(rtmp);CHKERRQ(ierr); 228 ierr = PetscFree3(v_work,mwork,v_pivots);CHKERRQ(ierr); 229 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 230 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 231 232 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 233 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 234 235 both_identity = (PetscBool) (row_identity && col_identity); 236 if (both_identity) { 237 C->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering; 238 } else { 239 C->ops->solve = MatSolve_SeqBAIJ_N; 240 } 241 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_N; 242 243 C->assembled = PETSC_TRUE; 244 245 ierr = PetscLogFlops(1.333333333333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 246 PetscFunctionReturn(0); 247 } 248 249 /* 250 ilu(0) with natural ordering under new data structure. 251 See MatILUFactorSymbolic_SeqAIJ_ilu0() for detailed description 252 because this code is almost identical to MatILUFactorSymbolic_SeqAIJ_ilu0_inplace(). 253 */ 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatILUFactorSymbolic_SeqBAIJ_ilu0" 257 PetscErrorCode MatILUFactorSymbolic_SeqBAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 258 { 259 260 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 261 PetscErrorCode ierr; 262 PetscInt n=a->mbs,*ai=a->i,*aj,*adiag=a->diag,bs2 = a->bs2; 263 PetscInt i,j,nz,*bi,*bj,*bdiag,bi_temp; 264 265 PetscFunctionBegin; 266 ierr = MatDuplicateNoCreate_SeqBAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 267 b = (Mat_SeqBAIJ*)(fact)->data; 268 269 /* allocate matrix arrays for new data structure */ 270 ierr = PetscMalloc3(bs2*ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); 271 ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 272 273 b->singlemalloc = PETSC_TRUE; 274 b->free_a = PETSC_TRUE; 275 b->free_ij = PETSC_TRUE; 276 fact->preallocated = PETSC_TRUE; 277 fact->assembled = PETSC_TRUE; 278 if (!b->diag) { 279 ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr); 280 ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 281 } 282 bdiag = b->diag; 283 284 if (n > 0) { 285 ierr = PetscMemzero(b->a,bs2*ai[n]*sizeof(MatScalar));CHKERRQ(ierr); 286 } 287 288 /* set bi and bj with new data structure */ 289 bi = b->i; 290 bj = b->j; 291 292 /* L part */ 293 bi[0] = 0; 294 for (i=0; i<n; i++) { 295 nz = adiag[i] - ai[i]; 296 bi[i+1] = bi[i] + nz; 297 aj = a->j + ai[i]; 298 for (j=0; j<nz; j++) { 299 *bj = aj[j]; bj++; 300 } 301 } 302 303 /* U part */ 304 bi_temp = bi[n]; 305 bdiag[n] = bi[n]-1; 306 for (i=n-1; i>=0; i--) { 307 nz = ai[i+1] - adiag[i] - 1; 308 bi_temp = bi_temp + nz + 1; 309 aj = a->j + adiag[i] + 1; 310 for (j=0; j<nz; j++) { 311 *bj = aj[j]; bj++; 312 } 313 /* diag[i] */ 314 *bj = i; bj++; 315 bdiag[i] = bi_temp - 1; 316 } 317 PetscFunctionReturn(0); 318 } 319 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatILUFactorSymbolic_SeqBAIJ" 322 PetscErrorCode MatILUFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 323 { 324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 325 IS isicol; 326 PetscErrorCode ierr; 327 const PetscInt *r,*ic; 328 PetscInt n=a->mbs,*ai=a->i,*aj=a->j,d; 329 PetscInt *bi,*cols,nnz,*cols_lvl; 330 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 331 PetscInt i,levels,diagonal_fill; 332 PetscBool col_identity,row_identity,both_identity; 333 PetscReal f; 334 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 335 PetscBT lnkbt; 336 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 337 PetscFreeSpaceList free_space =NULL,current_space=NULL; 338 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 339 PetscBool missing; 340 PetscInt bs=A->rmap->bs,bs2=a->bs2; 341 342 PetscFunctionBegin; 343 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 344 if (bs>1) { /* check shifttype */ 345 if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) 346 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 347 } 348 349 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 350 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 351 352 f = info->fill; 353 levels = (PetscInt)info->levels; 354 diagonal_fill = (PetscInt)info->diagonal_fill; 355 356 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 357 358 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 359 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 360 361 both_identity = (PetscBool) (row_identity && col_identity); 362 363 if (!levels && both_identity) { 364 /* special case: ilu(0) with natural ordering */ 365 ierr = MatILUFactorSymbolic_SeqBAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 366 ierr = MatSeqBAIJSetNumericFactorization(fact,both_identity);CHKERRQ(ierr); 367 368 fact->factortype = MAT_FACTOR_ILU; 369 (fact)->info.factor_mallocs = 0; 370 (fact)->info.fill_ratio_given = info->fill; 371 (fact)->info.fill_ratio_needed = 1.0; 372 373 b = (Mat_SeqBAIJ*)(fact)->data; 374 b->row = isrow; 375 b->col = iscol; 376 b->icol = isicol; 377 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 378 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 379 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 380 381 ierr = PetscMalloc1((n+1)*bs,&b->solve_work);CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 385 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 386 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 387 388 /* get new row pointers */ 389 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 390 bi[0] = 0; 391 /* bdiag is location of diagonal in factor */ 392 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 393 bdiag[0] = 0; 394 395 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 396 397 /* create a linked list for storing column indices of the active row */ 398 nlnk = n + 1; 399 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 400 401 /* initial FreeSpace size is f*(ai[n]+1) */ 402 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 403 current_space = free_space; 404 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 405 current_space_lvl = free_space_lvl; 406 407 for (i=0; i<n; i++) { 408 nzi = 0; 409 /* copy current row into linked list */ 410 nnz = ai[r[i]+1] - ai[r[i]]; 411 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 412 cols = aj + ai[r[i]]; 413 lnk[i] = -1; /* marker to indicate if diagonal exists */ 414 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 415 nzi += nlnk; 416 417 /* make sure diagonal entry is included */ 418 if (diagonal_fill && lnk[i] == -1) { 419 fm = n; 420 while (lnk[fm] < i) fm = lnk[fm]; 421 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 422 lnk[fm] = i; 423 lnk_lvl[i] = 0; 424 nzi++; dcount++; 425 } 426 427 /* add pivot rows into the active row */ 428 nzbd = 0; 429 prow = lnk[n]; 430 while (prow < i) { 431 nnz = bdiag[prow]; 432 cols = bj_ptr[prow] + nnz + 1; 433 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 434 nnz = bi[prow+1] - bi[prow] - nnz - 1; 435 436 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 437 nzi += nlnk; 438 prow = lnk[prow]; 439 nzbd++; 440 } 441 bdiag[i] = nzbd; 442 bi[i+1] = bi[i] + nzi; 443 444 /* if free space is not available, make more free space */ 445 if (current_space->local_remaining<nzi) { 446 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 447 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 448 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 449 reallocs++; 450 } 451 452 /* copy data into free_space and free_space_lvl, then initialize lnk */ 453 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 454 455 bj_ptr[i] = current_space->array; 456 bjlvl_ptr[i] = current_space_lvl->array; 457 458 /* make sure the active row i has diagonal entry */ 459 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 460 461 current_space->array += nzi; 462 current_space->local_used += nzi; 463 current_space->local_remaining -= nzi; 464 465 current_space_lvl->array += nzi; 466 current_space_lvl->local_used += nzi; 467 current_space_lvl->local_remaining -= nzi; 468 } 469 470 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 471 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 472 473 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 474 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 475 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 476 477 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 478 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 479 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 480 481 #if defined(PETSC_USE_INFO) 482 { 483 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 484 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 485 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 486 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 487 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 488 if (diagonal_fill) { 489 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 490 } 491 } 492 #endif 493 494 /* put together the new matrix */ 495 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 496 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 497 498 b = (Mat_SeqBAIJ*)(fact)->data; 499 b->free_a = PETSC_TRUE; 500 b->free_ij = PETSC_TRUE; 501 b->singlemalloc = PETSC_FALSE; 502 503 ierr = PetscMalloc1(bs2*(bdiag[0]+1),&b->a);CHKERRQ(ierr); 504 505 b->j = bj; 506 b->i = bi; 507 b->diag = bdiag; 508 b->free_diag = PETSC_TRUE; 509 b->ilen = 0; 510 b->imax = 0; 511 b->row = isrow; 512 b->col = iscol; 513 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 514 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 515 b->icol = isicol; 516 517 ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr); 518 /* In b structure: Free imax, ilen, old a, old j. 519 Allocate bdiag, solve_work, new a, new j */ 520 ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1) * (sizeof(PetscInt)+bs2*sizeof(PetscScalar)));CHKERRQ(ierr); 521 b->maxnz = b->nz = bdiag[0]+1; 522 523 fact->info.factor_mallocs = reallocs; 524 fact->info.fill_ratio_given = f; 525 fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 526 527 ierr = MatSeqBAIJSetNumericFactorization(fact,both_identity);CHKERRQ(ierr); 528 PetscFunctionReturn(0); 529 } 530 531 /* 532 This code is virtually identical to MatILUFactorSymbolic_SeqAIJ 533 except that the data structure of Mat_SeqAIJ is slightly different. 534 Not a good example of code reuse. 535 */ 536 #undef __FUNCT__ 537 #define __FUNCT__ "MatILUFactorSymbolic_SeqBAIJ_inplace" 538 PetscErrorCode MatILUFactorSymbolic_SeqBAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 539 { 540 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 541 IS isicol; 542 PetscErrorCode ierr; 543 const PetscInt *r,*ic,*ai = a->i,*aj = a->j,*xi; 544 PetscInt prow,n = a->mbs,*ainew,*ajnew,jmax,*fill,nz,*im,*ajfill,*flev,*xitmp; 545 PetscInt *dloc,idx,row,m,fm,nzf,nzi,reallocate = 0,dcount = 0; 546 PetscInt incrlev,nnz,i,bs = A->rmap->bs,bs2 = a->bs2,levels,diagonal_fill,dd; 547 PetscBool col_identity,row_identity,both_identity,flg; 548 PetscReal f; 549 550 PetscFunctionBegin; 551 ierr = MatMissingDiagonal_SeqBAIJ(A,&flg,&dd);CHKERRQ(ierr); 552 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix A is missing diagonal entry in row %D",dd); 553 554 f = info->fill; 555 levels = (PetscInt)info->levels; 556 diagonal_fill = (PetscInt)info->diagonal_fill; 557 558 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 559 560 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 561 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 562 both_identity = (PetscBool) (row_identity && col_identity); 563 564 if (!levels && both_identity) { /* special case copy the nonzero structure */ 565 ierr = MatDuplicateNoCreate_SeqBAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 566 ierr = MatSeqBAIJSetNumericFactorization_inplace(fact,both_identity);CHKERRQ(ierr); 567 568 fact->factortype = MAT_FACTOR_ILU; 569 b = (Mat_SeqBAIJ*)fact->data; 570 b->row = isrow; 571 b->col = iscol; 572 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 573 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 574 b->icol = isicol; 575 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 576 577 ierr = PetscMalloc1((n+1)*bs,&b->solve_work);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 /* general case perform the symbolic factorization */ 582 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 583 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 584 585 /* get new row pointers */ 586 ierr = PetscMalloc1(n+1,&ainew);CHKERRQ(ierr); 587 ainew[0] = 0; 588 /* don't know how many column pointers are needed so estimate */ 589 jmax = (PetscInt)(f*ai[n] + 1); 590 ierr = PetscMalloc1(jmax,&ajnew);CHKERRQ(ierr); 591 /* ajfill is level of fill for each fill entry */ 592 ierr = PetscMalloc1(jmax,&ajfill);CHKERRQ(ierr); 593 /* fill is a linked list of nonzeros in active row */ 594 ierr = PetscMalloc1(n+1,&fill);CHKERRQ(ierr); 595 /* im is level for each filled value */ 596 ierr = PetscMalloc1(n+1,&im);CHKERRQ(ierr); 597 /* dloc is location of diagonal in factor */ 598 ierr = PetscMalloc1(n+1,&dloc);CHKERRQ(ierr); 599 dloc[0] = 0; 600 for (prow=0; prow<n; prow++) { 601 602 /* copy prow into linked list */ 603 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 604 if (!nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[prow],prow); 605 xi = aj + ai[r[prow]]; 606 fill[n] = n; 607 fill[prow] = -1; /* marker for diagonal entry */ 608 while (nz--) { 609 fm = n; 610 idx = ic[*xi++]; 611 do { 612 m = fm; 613 fm = fill[m]; 614 } while (fm < idx); 615 fill[m] = idx; 616 fill[idx] = fm; 617 im[idx] = 0; 618 } 619 620 /* make sure diagonal entry is included */ 621 if (diagonal_fill && fill[prow] == -1) { 622 fm = n; 623 while (fill[fm] < prow) fm = fill[fm]; 624 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 625 fill[fm] = prow; 626 im[prow] = 0; 627 nzf++; 628 dcount++; 629 } 630 631 nzi = 0; 632 row = fill[n]; 633 while (row < prow) { 634 incrlev = im[row] + 1; 635 nz = dloc[row]; 636 xi = ajnew + ainew[row] + nz + 1; 637 flev = ajfill + ainew[row] + nz + 1; 638 nnz = ainew[row+1] - ainew[row] - nz - 1; 639 fm = row; 640 while (nnz-- > 0) { 641 idx = *xi++; 642 if (*flev + incrlev > levels) { 643 flev++; 644 continue; 645 } 646 do { 647 m = fm; 648 fm = fill[m]; 649 } while (fm < idx); 650 if (fm != idx) { 651 im[idx] = *flev + incrlev; 652 fill[m] = idx; 653 fill[idx] = fm; 654 fm = idx; 655 nzf++; 656 } else if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 657 flev++; 658 } 659 row = fill[row]; 660 nzi++; 661 } 662 /* copy new filled row into permanent storage */ 663 ainew[prow+1] = ainew[prow] + nzf; 664 if (ainew[prow+1] > jmax) { 665 666 /* estimate how much additional space we will need */ 667 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 668 /* just double the memory each time */ 669 PetscInt maxadd = jmax; 670 /* maxadd = (int)(((f*ai[n]+1)*(n-prow+5))/n); */ 671 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 672 jmax += maxadd; 673 674 /* allocate a longer ajnew and ajfill */ 675 ierr = PetscMalloc1(jmax,&xitmp);CHKERRQ(ierr); 676 ierr = PetscMemcpy(xitmp,ajnew,ainew[prow]*sizeof(PetscInt));CHKERRQ(ierr); 677 ierr = PetscFree(ajnew);CHKERRQ(ierr); 678 ajnew = xitmp; 679 ierr = PetscMalloc1(jmax,&xitmp);CHKERRQ(ierr); 680 ierr = PetscMemcpy(xitmp,ajfill,ainew[prow]*sizeof(PetscInt));CHKERRQ(ierr); 681 ierr = PetscFree(ajfill);CHKERRQ(ierr); 682 ajfill = xitmp; 683 reallocate++; /* count how many reallocations are needed */ 684 } 685 xitmp = ajnew + ainew[prow]; 686 flev = ajfill + ainew[prow]; 687 dloc[prow] = nzi; 688 fm = fill[n]; 689 while (nzf--) { 690 *xitmp++ = fm; 691 *flev++ = im[fm]; 692 fm = fill[fm]; 693 } 694 /* make sure row has diagonal entry */ 695 if (ajnew[ainew[prow]+dloc[prow]] != prow) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 696 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",prow); 697 } 698 ierr = PetscFree(ajfill);CHKERRQ(ierr); 699 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 700 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 701 ierr = PetscFree(fill);CHKERRQ(ierr); 702 ierr = PetscFree(im);CHKERRQ(ierr); 703 704 #if defined(PETSC_USE_INFO) 705 { 706 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 707 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocate,(double)f,(double)af);CHKERRQ(ierr); 708 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 709 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 710 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 711 if (diagonal_fill) { 712 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 713 } 714 } 715 #endif 716 717 /* put together the new matrix */ 718 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 719 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 720 b = (Mat_SeqBAIJ*)fact->data; 721 722 b->free_a = PETSC_TRUE; 723 b->free_ij = PETSC_TRUE; 724 b->singlemalloc = PETSC_FALSE; 725 726 ierr = PetscMalloc1(bs2*ainew[n],&b->a);CHKERRQ(ierr); 727 728 b->j = ajnew; 729 b->i = ainew; 730 for (i=0; i<n; i++) dloc[i] += ainew[i]; 731 b->diag = dloc; 732 b->free_diag = PETSC_TRUE; 733 b->ilen = 0; 734 b->imax = 0; 735 b->row = isrow; 736 b->col = iscol; 737 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 738 739 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 740 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 741 b->icol = isicol; 742 ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr); 743 /* In b structure: Free imax, ilen, old a, old j. 744 Allocate dloc, solve_work, new a, new j */ 745 ierr = PetscLogObjectMemory((PetscObject)fact,(ainew[n]-n)*(sizeof(PetscInt))+bs2*ainew[n]*sizeof(PetscScalar));CHKERRQ(ierr); 746 b->maxnz = b->nz = ainew[n]; 747 748 fact->info.factor_mallocs = reallocate; 749 fact->info.fill_ratio_given = f; 750 fact->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 751 752 ierr = MatSeqBAIJSetNumericFactorization_inplace(fact,both_identity);CHKERRQ(ierr); 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE" 758 PetscErrorCode MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE(Mat A) 759 { 760 /* Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; */ 761 /* int i,*AJ=a->j,nz=a->nz; */ 762 763 PetscFunctionBegin; 764 /* Undo Column scaling */ 765 /* while (nz--) { */ 766 /* AJ[i] = AJ[i]/4; */ 767 /* } */ 768 /* This should really invoke a push/pop logic, but we don't have that yet. */ 769 A->ops->setunfactored = NULL; 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNCT__ 774 #define __FUNCT__ "MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj" 775 PetscErrorCode MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj(Mat A) 776 { 777 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 778 PetscInt *AJ=a->j,nz=a->nz; 779 unsigned short *aj=(unsigned short*)AJ; 780 781 PetscFunctionBegin; 782 /* Is this really necessary? */ 783 while (nz--) { 784 AJ[nz] = (int)((unsigned int)aj[nz]); /* First extend, then convert to signed. */ 785 } 786 A->ops->setunfactored = NULL; 787 PetscFunctionReturn(0); 788 } 789 790 791