1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 EXTERN_C_BEGIN 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 10 /* 11 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 12 13 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 14 */ 15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 16 { 17 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 20 const PetscInt *ai = a->i, *aj = a->j; 21 const PetscScalar *aa = a->a; 22 PetscBool *done; 23 PetscReal best,past = 0,future; 24 25 PetscFunctionBegin; 26 /* pick initial row */ 27 best = -1; 28 for (i=0; i<n; i++) { 29 future = 0.0; 30 for (j=ai[i]; j<ai[i+1]; j++) { 31 if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 42 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 EXTERN_C_END 94 95 EXTERN_C_BEGIN 96 #undef __FUNCT__ 97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 99 { 100 PetscFunctionBegin; 101 *flg = PETSC_TRUE; 102 PetscFunctionReturn(0); 103 } 104 EXTERN_C_END 105 106 EXTERN_C_BEGIN 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 110 { 111 PetscInt n = A->rmap->n; 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 116 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 117 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 118 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 119 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 120 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 121 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 122 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 123 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 124 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 125 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 126 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 127 (*B)->factortype = ftype; 128 PetscFunctionReturn(0); 129 } 130 EXTERN_C_END 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 134 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 135 { 136 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 137 IS isicol; 138 PetscErrorCode ierr; 139 const PetscInt *r,*ic; 140 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 141 PetscInt *bi,*bj,*ajtmp; 142 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 143 PetscReal f; 144 PetscInt nlnk,*lnk,k,**bi_ptr; 145 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 146 PetscBT lnkbt; 147 148 PetscFunctionBegin; 149 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 150 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 151 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 152 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 153 154 /* get new row pointers */ 155 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 156 bi[0] = 0; 157 158 /* bdiag is location of diagonal in factor */ 159 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 160 bdiag[0] = 0; 161 162 /* linked list for storing column indices of the active row */ 163 nlnk = n + 1; 164 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 165 166 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 167 168 /* initial FreeSpace size is f*(ai[n]+1) */ 169 f = info->fill; 170 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 171 current_space = free_space; 172 173 for (i=0; i<n; i++) { 174 /* copy previous fill into linked list */ 175 nzi = 0; 176 nnz = ai[r[i]+1] - ai[r[i]]; 177 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 178 ajtmp = aj + ai[r[i]]; 179 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 180 nzi += nlnk; 181 182 /* add pivot rows into linked list */ 183 row = lnk[n]; 184 while (row < i) { 185 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 186 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 187 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 188 nzi += nlnk; 189 row = lnk[row]; 190 } 191 bi[i+1] = bi[i] + nzi; 192 im[i] = nzi; 193 194 /* mark bdiag */ 195 nzbd = 0; 196 nnz = nzi; 197 k = lnk[n]; 198 while (nnz-- && k < i){ 199 nzbd++; 200 k = lnk[k]; 201 } 202 bdiag[i] = bi[i] + nzbd; 203 204 /* if free space is not available, make more free space */ 205 if (current_space->local_remaining<nzi) { 206 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 207 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 208 reallocs++; 209 } 210 211 /* copy data into free space, then initialize lnk */ 212 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 213 bi_ptr[i] = current_space->array; 214 current_space->array += nzi; 215 current_space->local_used += nzi; 216 current_space->local_remaining -= nzi; 217 } 218 #if defined(PETSC_USE_INFO) 219 if (ai[n] != 0) { 220 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 221 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 222 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 223 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 224 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 225 } else { 226 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 227 } 228 #endif 229 230 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 231 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 232 233 /* destroy list of free space and other temporary array(s) */ 234 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 235 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 236 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 237 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 238 239 /* put together the new matrix */ 240 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 241 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 242 b = (Mat_SeqAIJ*)(B)->data; 243 b->free_a = PETSC_TRUE; 244 b->free_ij = PETSC_TRUE; 245 b->singlemalloc = PETSC_FALSE; 246 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 247 b->j = bj; 248 b->i = bi; 249 b->diag = bdiag; 250 b->ilen = 0; 251 b->imax = 0; 252 b->row = isrow; 253 b->col = iscol; 254 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 255 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 256 b->icol = isicol; 257 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 258 259 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 260 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 261 b->maxnz = b->nz = bi[n] ; 262 263 (B)->factortype = MAT_FACTOR_LU; 264 (B)->info.factor_mallocs = reallocs; 265 (B)->info.fill_ratio_given = f; 266 267 if (ai[n]) { 268 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 269 } else { 270 (B)->info.fill_ratio_needed = 0.0; 271 } 272 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 273 if (a->inode.size) { 274 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 275 } 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 281 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 282 { 283 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 284 IS isicol; 285 PetscErrorCode ierr; 286 const PetscInt *r,*ic; 287 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 288 PetscInt *bi,*bj,*ajtmp; 289 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 290 PetscReal f; 291 PetscInt nlnk,*lnk,k,**bi_ptr; 292 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 293 PetscBT lnkbt; 294 PetscBool olddatastruct=PETSC_FALSE; 295 296 PetscFunctionBegin; 297 /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 298 ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 299 if(olddatastruct){ 300 ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303 304 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 305 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 306 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 307 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 308 309 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 310 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 311 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 312 bi[0] = bdiag[0] = 0; 313 314 /* linked list for storing column indices of the active row */ 315 nlnk = n + 1; 316 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 317 318 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 319 320 /* initial FreeSpace size is f*(ai[n]+1) */ 321 f = info->fill; 322 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 323 current_space = free_space; 324 325 for (i=0; i<n; i++) { 326 /* copy previous fill into linked list */ 327 nzi = 0; 328 nnz = ai[r[i]+1] - ai[r[i]]; 329 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 330 ajtmp = aj + ai[r[i]]; 331 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 332 nzi += nlnk; 333 334 /* add pivot rows into linked list */ 335 row = lnk[n]; 336 while (row < i){ 337 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 338 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 339 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 340 nzi += nlnk; 341 row = lnk[row]; 342 } 343 bi[i+1] = bi[i] + nzi; 344 im[i] = nzi; 345 346 /* mark bdiag */ 347 nzbd = 0; 348 nnz = nzi; 349 k = lnk[n]; 350 while (nnz-- && k < i){ 351 nzbd++; 352 k = lnk[k]; 353 } 354 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 355 356 /* if free space is not available, make more free space */ 357 if (current_space->local_remaining<nzi) { 358 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 359 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 360 reallocs++; 361 } 362 363 /* copy data into free space, then initialize lnk */ 364 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 365 bi_ptr[i] = current_space->array; 366 current_space->array += nzi; 367 current_space->local_used += nzi; 368 current_space->local_remaining -= nzi; 369 } 370 371 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 372 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 373 374 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 375 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 376 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 377 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 378 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 379 380 /* put together the new matrix */ 381 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 382 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 383 b = (Mat_SeqAIJ*)(B)->data; 384 b->free_a = PETSC_TRUE; 385 b->free_ij = PETSC_TRUE; 386 b->singlemalloc = PETSC_FALSE; 387 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 388 b->j = bj; 389 b->i = bi; 390 b->diag = bdiag; 391 b->ilen = 0; 392 b->imax = 0; 393 b->row = isrow; 394 b->col = iscol; 395 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 396 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 397 b->icol = isicol; 398 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 399 400 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 401 ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 402 b->maxnz = b->nz = bdiag[0]+1; 403 B->factortype = MAT_FACTOR_LU; 404 B->info.factor_mallocs = reallocs; 405 B->info.fill_ratio_given = f; 406 407 if (ai[n]) { 408 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 409 } else { 410 B->info.fill_ratio_needed = 0.0; 411 } 412 #if defined(PETSC_USE_INFO) 413 if (ai[n] != 0) { 414 PetscReal af = B->info.fill_ratio_needed; 415 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 416 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 417 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 418 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 419 } else { 420 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 421 } 422 #endif 423 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 424 if (a->inode.size) { 425 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 426 } 427 PetscFunctionReturn(0); 428 } 429 430 /* 431 Trouble in factorization, should we dump the original matrix? 432 */ 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatFactorDumpMatrix" 435 PetscErrorCode MatFactorDumpMatrix(Mat A) 436 { 437 PetscErrorCode ierr; 438 PetscBool flg = PETSC_FALSE; 439 440 PetscFunctionBegin; 441 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 442 if (flg) { 443 PetscViewer viewer; 444 char filename[PETSC_MAX_PATH_LEN]; 445 446 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 447 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 448 ierr = MatView(A,viewer);CHKERRQ(ierr); 449 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 450 } 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 456 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 457 { 458 Mat C=B; 459 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 460 IS isrow = b->row,isicol = b->icol; 461 PetscErrorCode ierr; 462 const PetscInt *r,*ic,*ics; 463 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 464 PetscInt i,j,k,nz,nzL,row,*pj; 465 const PetscInt *ajtmp,*bjtmp; 466 MatScalar *rtmp,*pc,multiplier,*pv; 467 const MatScalar *aa=a->a,*v; 468 PetscBool row_identity,col_identity; 469 FactorShiftCtx sctx; 470 const PetscInt *ddiag; 471 PetscReal rs; 472 MatScalar d; 473 474 PetscFunctionBegin; 475 /* MatPivotSetUp(): initialize shift context sctx */ 476 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 477 478 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 479 ddiag = a->diag; 480 sctx.shift_top = info->zeropivot; 481 for (i=0; i<n; i++) { 482 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 483 d = (aa)[ddiag[i]]; 484 rs = -PetscAbsScalar(d) - PetscRealPart(d); 485 v = aa+ai[i]; 486 nz = ai[i+1] - ai[i]; 487 for (j=0; j<nz; j++) 488 rs += PetscAbsScalar(v[j]); 489 if (rs>sctx.shift_top) sctx.shift_top = rs; 490 } 491 sctx.shift_top *= 1.1; 492 sctx.nshift_max = 5; 493 sctx.shift_lo = 0.; 494 sctx.shift_hi = 1.; 495 } 496 497 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 498 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 499 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 500 ics = ic; 501 502 do { 503 sctx.newshift = PETSC_FALSE; 504 for (i=0; i<n; i++){ 505 /* zero rtmp */ 506 /* L part */ 507 nz = bi[i+1] - bi[i]; 508 bjtmp = bj + bi[i]; 509 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 510 511 /* U part */ 512 nz = bdiag[i]-bdiag[i+1]; 513 bjtmp = bj + bdiag[i+1]+1; 514 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 515 516 /* load in initial (unfactored row) */ 517 nz = ai[r[i]+1] - ai[r[i]]; 518 ajtmp = aj + ai[r[i]]; 519 v = aa + ai[r[i]]; 520 for (j=0; j<nz; j++) { 521 rtmp[ics[ajtmp[j]]] = v[j]; 522 } 523 /* ZeropivotApply() */ 524 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 525 526 /* elimination */ 527 bjtmp = bj + bi[i]; 528 row = *bjtmp++; 529 nzL = bi[i+1] - bi[i]; 530 for(k=0; k < nzL;k++) { 531 pc = rtmp + row; 532 if (*pc != 0.0) { 533 pv = b->a + bdiag[row]; 534 multiplier = *pc * (*pv); 535 *pc = multiplier; 536 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 537 pv = b->a + bdiag[row+1]+1; 538 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 539 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 540 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 541 } 542 row = *bjtmp++; 543 } 544 545 /* finished row so stick it into b->a */ 546 rs = 0.0; 547 /* L part */ 548 pv = b->a + bi[i] ; 549 pj = b->j + bi[i] ; 550 nz = bi[i+1] - bi[i]; 551 for (j=0; j<nz; j++) { 552 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 553 } 554 555 /* U part */ 556 pv = b->a + bdiag[i+1]+1; 557 pj = b->j + bdiag[i+1]+1; 558 nz = bdiag[i] - bdiag[i+1]-1; 559 for (j=0; j<nz; j++) { 560 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 561 } 562 563 sctx.rs = rs; 564 sctx.pv = rtmp[i]; 565 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 566 if(sctx.newshift) break; /* break for-loop */ 567 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 568 569 /* Mark diagonal and invert diagonal for simplier triangular solves */ 570 pv = b->a + bdiag[i]; 571 *pv = 1.0/rtmp[i]; 572 573 } /* endof for (i=0; i<n; i++){ */ 574 575 /* MatPivotRefine() */ 576 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){ 577 /* 578 * if no shift in this attempt & shifting & started shifting & can refine, 579 * then try lower shift 580 */ 581 sctx.shift_hi = sctx.shift_fraction; 582 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 583 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 584 sctx.newshift = PETSC_TRUE; 585 sctx.nshift++; 586 } 587 } while (sctx.newshift); 588 589 ierr = PetscFree(rtmp);CHKERRQ(ierr); 590 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 591 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 592 593 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 594 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 595 if (row_identity && col_identity) { 596 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 597 } else { 598 C->ops->solve = MatSolve_SeqAIJ; 599 } 600 C->ops->solveadd = MatSolveAdd_SeqAIJ; 601 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 602 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 603 C->ops->matsolve = MatMatSolve_SeqAIJ; 604 C->assembled = PETSC_TRUE; 605 C->preallocated = PETSC_TRUE; 606 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 607 608 /* MatShiftView(A,info,&sctx) */ 609 if (sctx.nshift){ 610 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 611 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 612 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 613 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 614 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 615 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 616 } 617 } 618 ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 624 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 625 { 626 Mat C=B; 627 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 628 IS isrow = b->row,isicol = b->icol; 629 PetscErrorCode ierr; 630 const PetscInt *r,*ic,*ics; 631 PetscInt nz,row,i,j,n=A->rmap->n,diag; 632 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 633 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 634 MatScalar *pv,*rtmp,*pc,multiplier,d; 635 const MatScalar *v,*aa=a->a; 636 PetscReal rs=0.0; 637 FactorShiftCtx sctx; 638 const PetscInt *ddiag; 639 PetscBool row_identity, col_identity; 640 641 PetscFunctionBegin; 642 /* MatPivotSetUp(): initialize shift context sctx */ 643 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 644 645 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 646 ddiag = a->diag; 647 sctx.shift_top = info->zeropivot; 648 for (i=0; i<n; i++) { 649 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 650 d = (aa)[ddiag[i]]; 651 rs = -PetscAbsScalar(d) - PetscRealPart(d); 652 v = aa+ai[i]; 653 nz = ai[i+1] - ai[i]; 654 for (j=0; j<nz; j++) 655 rs += PetscAbsScalar(v[j]); 656 if (rs>sctx.shift_top) sctx.shift_top = rs; 657 } 658 sctx.shift_top *= 1.1; 659 sctx.nshift_max = 5; 660 sctx.shift_lo = 0.; 661 sctx.shift_hi = 1.; 662 } 663 664 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 665 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 666 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 667 ics = ic; 668 669 do { 670 sctx.newshift = PETSC_FALSE; 671 for (i=0; i<n; i++){ 672 nz = bi[i+1] - bi[i]; 673 bjtmp = bj + bi[i]; 674 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 675 676 /* load in initial (unfactored row) */ 677 nz = ai[r[i]+1] - ai[r[i]]; 678 ajtmp = aj + ai[r[i]]; 679 v = aa + ai[r[i]]; 680 for (j=0; j<nz; j++) { 681 rtmp[ics[ajtmp[j]]] = v[j]; 682 } 683 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 684 685 row = *bjtmp++; 686 while (row < i) { 687 pc = rtmp + row; 688 if (*pc != 0.0) { 689 pv = b->a + diag_offset[row]; 690 pj = b->j + diag_offset[row] + 1; 691 multiplier = *pc / *pv++; 692 *pc = multiplier; 693 nz = bi[row+1] - diag_offset[row] - 1; 694 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 695 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 696 } 697 row = *bjtmp++; 698 } 699 /* finished row so stick it into b->a */ 700 pv = b->a + bi[i] ; 701 pj = b->j + bi[i] ; 702 nz = bi[i+1] - bi[i]; 703 diag = diag_offset[i] - bi[i]; 704 rs = 0.0; 705 for (j=0; j<nz; j++) { 706 pv[j] = rtmp[pj[j]]; 707 rs += PetscAbsScalar(pv[j]); 708 } 709 rs -= PetscAbsScalar(pv[diag]); 710 711 sctx.rs = rs; 712 sctx.pv = pv[diag]; 713 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 714 if (sctx.newshift) break; 715 pv[diag] = sctx.pv; 716 } 717 718 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 719 /* 720 * if no shift in this attempt & shifting & started shifting & can refine, 721 * then try lower shift 722 */ 723 sctx.shift_hi = sctx.shift_fraction; 724 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 725 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 726 sctx.newshift = PETSC_TRUE; 727 sctx.nshift++; 728 } 729 } while (sctx.newshift); 730 731 /* invert diagonal entries for simplier triangular solves */ 732 for (i=0; i<n; i++) { 733 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 734 } 735 ierr = PetscFree(rtmp);CHKERRQ(ierr); 736 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 737 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 738 739 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 740 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 741 if (row_identity && col_identity) { 742 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 743 } else { 744 C->ops->solve = MatSolve_SeqAIJ_inplace; 745 } 746 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 747 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 748 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 749 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 750 C->assembled = PETSC_TRUE; 751 C->preallocated = PETSC_TRUE; 752 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 753 if (sctx.nshift){ 754 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 755 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 756 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 757 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 758 } 759 } 760 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 761 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 762 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 /* 767 This routine implements inplace ILU(0) with row or/and column permutations. 768 Input: 769 A - original matrix 770 Output; 771 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 772 a->j (col index) is permuted by the inverse of colperm, then sorted 773 a->a reordered accordingly with a->j 774 a->diag (ptr to diagonal elements) is updated. 775 */ 776 #undef __FUNCT__ 777 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 778 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 779 { 780 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 781 IS isrow = a->row,isicol = a->icol; 782 PetscErrorCode ierr; 783 const PetscInt *r,*ic,*ics; 784 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 785 PetscInt *ajtmp,nz,row; 786 PetscInt *diag = a->diag,nbdiag,*pj; 787 PetscScalar *rtmp,*pc,multiplier,d; 788 MatScalar *pv,*v; 789 PetscReal rs; 790 FactorShiftCtx sctx; 791 const MatScalar *aa=a->a,*vtmp; 792 793 PetscFunctionBegin; 794 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 795 796 /* MatPivotSetUp(): initialize shift context sctx */ 797 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 798 799 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 800 const PetscInt *ddiag = a->diag; 801 sctx.shift_top = info->zeropivot; 802 for (i=0; i<n; i++) { 803 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 804 d = (aa)[ddiag[i]]; 805 rs = -PetscAbsScalar(d) - PetscRealPart(d); 806 vtmp = aa+ai[i]; 807 nz = ai[i+1] - ai[i]; 808 for (j=0; j<nz; j++) 809 rs += PetscAbsScalar(vtmp[j]); 810 if (rs>sctx.shift_top) sctx.shift_top = rs; 811 } 812 sctx.shift_top *= 1.1; 813 sctx.nshift_max = 5; 814 sctx.shift_lo = 0.; 815 sctx.shift_hi = 1.; 816 } 817 818 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 819 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 820 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 821 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 822 ics = ic; 823 824 #if defined(MV) 825 sctx.shift_top = 0.; 826 sctx.nshift_max = 0; 827 sctx.shift_lo = 0.; 828 sctx.shift_hi = 0.; 829 sctx.shift_fraction = 0.; 830 831 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 832 sctx.shift_top = 0.; 833 for (i=0; i<n; i++) { 834 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 835 d = (a->a)[diag[i]]; 836 rs = -PetscAbsScalar(d) - PetscRealPart(d); 837 v = a->a+ai[i]; 838 nz = ai[i+1] - ai[i]; 839 for (j=0; j<nz; j++) 840 rs += PetscAbsScalar(v[j]); 841 if (rs>sctx.shift_top) sctx.shift_top = rs; 842 } 843 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 844 sctx.shift_top *= 1.1; 845 sctx.nshift_max = 5; 846 sctx.shift_lo = 0.; 847 sctx.shift_hi = 1.; 848 } 849 850 sctx.shift_amount = 0.; 851 sctx.nshift = 0; 852 #endif 853 854 do { 855 sctx.newshift = PETSC_FALSE; 856 for (i=0; i<n; i++){ 857 /* load in initial unfactored row */ 858 nz = ai[r[i]+1] - ai[r[i]]; 859 ajtmp = aj + ai[r[i]]; 860 v = a->a + ai[r[i]]; 861 /* sort permuted ajtmp and values v accordingly */ 862 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 863 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 864 865 diag[r[i]] = ai[r[i]]; 866 for (j=0; j<nz; j++) { 867 rtmp[ajtmp[j]] = v[j]; 868 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 869 } 870 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 871 872 row = *ajtmp++; 873 while (row < i) { 874 pc = rtmp + row; 875 if (*pc != 0.0) { 876 pv = a->a + diag[r[row]]; 877 pj = aj + diag[r[row]] + 1; 878 879 multiplier = *pc / *pv++; 880 *pc = multiplier; 881 nz = ai[r[row]+1] - diag[r[row]] - 1; 882 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 883 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 884 } 885 row = *ajtmp++; 886 } 887 /* finished row so overwrite it onto a->a */ 888 pv = a->a + ai[r[i]] ; 889 pj = aj + ai[r[i]] ; 890 nz = ai[r[i]+1] - ai[r[i]]; 891 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 892 893 rs = 0.0; 894 for (j=0; j<nz; j++) { 895 pv[j] = rtmp[pj[j]]; 896 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 897 } 898 899 sctx.rs = rs; 900 sctx.pv = pv[nbdiag]; 901 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 902 if (sctx.newshift) break; 903 pv[nbdiag] = sctx.pv; 904 } 905 906 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 907 /* 908 * if no shift in this attempt & shifting & started shifting & can refine, 909 * then try lower shift 910 */ 911 sctx.shift_hi = sctx.shift_fraction; 912 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 913 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 914 sctx.newshift = PETSC_TRUE; 915 sctx.nshift++; 916 } 917 } while (sctx.newshift); 918 919 /* invert diagonal entries for simplier triangular solves */ 920 for (i=0; i<n; i++) { 921 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 922 } 923 924 ierr = PetscFree(rtmp);CHKERRQ(ierr); 925 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 926 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 927 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 928 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 929 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 930 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 931 A->assembled = PETSC_TRUE; 932 A->preallocated = PETSC_TRUE; 933 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 934 if (sctx.nshift){ 935 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 936 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 937 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 938 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 939 } 940 } 941 PetscFunctionReturn(0); 942 } 943 944 /* ----------------------------------------------------------- */ 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatLUFactor_SeqAIJ" 947 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 948 { 949 PetscErrorCode ierr; 950 Mat C; 951 952 PetscFunctionBegin; 953 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 954 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 955 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 956 A->ops->solve = C->ops->solve; 957 A->ops->solvetranspose = C->ops->solvetranspose; 958 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 959 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 /* ----------------------------------------------------------- */ 963 964 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 967 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 968 { 969 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 970 IS iscol = a->col,isrow = a->row; 971 PetscErrorCode ierr; 972 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 973 PetscInt nz; 974 const PetscInt *rout,*cout,*r,*c; 975 PetscScalar *x,*tmp,*tmps,sum; 976 const PetscScalar *b; 977 const MatScalar *aa = a->a,*v; 978 979 PetscFunctionBegin; 980 if (!n) PetscFunctionReturn(0); 981 982 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 983 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 984 tmp = a->solve_work; 985 986 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 987 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 988 989 /* forward solve the lower triangular */ 990 tmp[0] = b[*r++]; 991 tmps = tmp; 992 for (i=1; i<n; i++) { 993 v = aa + ai[i] ; 994 vi = aj + ai[i] ; 995 nz = a->diag[i] - ai[i]; 996 sum = b[*r++]; 997 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 998 tmp[i] = sum; 999 } 1000 1001 /* backward solve the upper triangular */ 1002 for (i=n-1; i>=0; i--){ 1003 v = aa + a->diag[i] + 1; 1004 vi = aj + a->diag[i] + 1; 1005 nz = ai[i+1] - a->diag[i] - 1; 1006 sum = tmp[i]; 1007 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1008 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1009 } 1010 1011 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1012 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1013 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1014 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1015 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1021 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1022 { 1023 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1024 IS iscol = a->col,isrow = a->row; 1025 PetscErrorCode ierr; 1026 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1027 PetscInt nz,neq; 1028 const PetscInt *rout,*cout,*r,*c; 1029 PetscScalar *x,*b,*tmp,*tmps,sum; 1030 const MatScalar *aa = a->a,*v; 1031 PetscBool bisdense,xisdense; 1032 1033 PetscFunctionBegin; 1034 if (!n) PetscFunctionReturn(0); 1035 1036 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1037 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1038 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1039 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1040 1041 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1042 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1043 1044 tmp = a->solve_work; 1045 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1046 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1047 1048 for (neq=0; neq<B->cmap->n; neq++){ 1049 /* forward solve the lower triangular */ 1050 tmp[0] = b[r[0]]; 1051 tmps = tmp; 1052 for (i=1; i<n; i++) { 1053 v = aa + ai[i] ; 1054 vi = aj + ai[i] ; 1055 nz = a->diag[i] - ai[i]; 1056 sum = b[r[i]]; 1057 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1058 tmp[i] = sum; 1059 } 1060 /* backward solve the upper triangular */ 1061 for (i=n-1; i>=0; i--){ 1062 v = aa + a->diag[i] + 1; 1063 vi = aj + a->diag[i] + 1; 1064 nz = ai[i+1] - a->diag[i] - 1; 1065 sum = tmp[i]; 1066 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1067 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1068 } 1069 1070 b += n; 1071 x += n; 1072 } 1073 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1074 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1075 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1076 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1077 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1083 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1084 { 1085 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1086 IS iscol = a->col,isrow = a->row; 1087 PetscErrorCode ierr; 1088 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1089 PetscInt nz,neq; 1090 const PetscInt *rout,*cout,*r,*c; 1091 PetscScalar *x,*b,*tmp,sum; 1092 const MatScalar *aa = a->a,*v; 1093 PetscBool bisdense,xisdense; 1094 1095 PetscFunctionBegin; 1096 if (!n) PetscFunctionReturn(0); 1097 1098 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1099 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1100 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1101 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1102 1103 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1104 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1105 1106 tmp = a->solve_work; 1107 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1108 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1109 1110 for (neq=0; neq<B->cmap->n; neq++){ 1111 /* forward solve the lower triangular */ 1112 tmp[0] = b[r[0]]; 1113 v = aa; 1114 vi = aj; 1115 for (i=1; i<n; i++) { 1116 nz = ai[i+1] - ai[i]; 1117 sum = b[r[i]]; 1118 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1119 tmp[i] = sum; 1120 v += nz; vi += nz; 1121 } 1122 1123 /* backward solve the upper triangular */ 1124 for (i=n-1; i>=0; i--){ 1125 v = aa + adiag[i+1]+1; 1126 vi = aj + adiag[i+1]+1; 1127 nz = adiag[i]-adiag[i+1]-1; 1128 sum = tmp[i]; 1129 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1130 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1131 } 1132 1133 b += n; 1134 x += n; 1135 } 1136 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1137 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1138 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1139 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1140 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1146 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1147 { 1148 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1149 IS iscol = a->col,isrow = a->row; 1150 PetscErrorCode ierr; 1151 const PetscInt *r,*c,*rout,*cout; 1152 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1153 PetscInt nz,row; 1154 PetscScalar *x,*b,*tmp,*tmps,sum; 1155 const MatScalar *aa = a->a,*v; 1156 1157 PetscFunctionBegin; 1158 if (!n) PetscFunctionReturn(0); 1159 1160 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1161 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1162 tmp = a->solve_work; 1163 1164 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1165 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1166 1167 /* forward solve the lower triangular */ 1168 tmp[0] = b[*r++]; 1169 tmps = tmp; 1170 for (row=1; row<n; row++) { 1171 i = rout[row]; /* permuted row */ 1172 v = aa + ai[i] ; 1173 vi = aj + ai[i] ; 1174 nz = a->diag[i] - ai[i]; 1175 sum = b[*r++]; 1176 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1177 tmp[row] = sum; 1178 } 1179 1180 /* backward solve the upper triangular */ 1181 for (row=n-1; row>=0; row--){ 1182 i = rout[row]; /* permuted row */ 1183 v = aa + a->diag[i] + 1; 1184 vi = aj + a->diag[i] + 1; 1185 nz = ai[i+1] - a->diag[i] - 1; 1186 sum = tmp[row]; 1187 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1188 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1189 } 1190 1191 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1192 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1193 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1194 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1195 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 /* ----------------------------------------------------------- */ 1200 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1203 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1204 { 1205 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1206 PetscErrorCode ierr; 1207 PetscInt n = A->rmap->n; 1208 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1209 PetscScalar *x; 1210 const PetscScalar *b; 1211 const MatScalar *aa = a->a; 1212 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1213 PetscInt adiag_i,i,nz,ai_i; 1214 const PetscInt *vi; 1215 const MatScalar *v; 1216 PetscScalar sum; 1217 #endif 1218 1219 PetscFunctionBegin; 1220 if (!n) PetscFunctionReturn(0); 1221 1222 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1223 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1224 1225 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1226 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1227 #else 1228 /* forward solve the lower triangular */ 1229 x[0] = b[0]; 1230 for (i=1; i<n; i++) { 1231 ai_i = ai[i]; 1232 v = aa + ai_i; 1233 vi = aj + ai_i; 1234 nz = adiag[i] - ai_i; 1235 sum = b[i]; 1236 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1237 x[i] = sum; 1238 } 1239 1240 /* backward solve the upper triangular */ 1241 for (i=n-1; i>=0; i--){ 1242 adiag_i = adiag[i]; 1243 v = aa + adiag_i + 1; 1244 vi = aj + adiag_i + 1; 1245 nz = ai[i+1] - adiag_i - 1; 1246 sum = x[i]; 1247 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1248 x[i] = sum*aa[adiag_i]; 1249 } 1250 #endif 1251 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1252 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1253 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1259 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1260 { 1261 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1262 IS iscol = a->col,isrow = a->row; 1263 PetscErrorCode ierr; 1264 PetscInt i, n = A->rmap->n,j; 1265 PetscInt nz; 1266 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1267 PetscScalar *x,*tmp,sum; 1268 const PetscScalar *b; 1269 const MatScalar *aa = a->a,*v; 1270 1271 PetscFunctionBegin; 1272 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1273 1274 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1275 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1276 tmp = a->solve_work; 1277 1278 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1279 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1280 1281 /* forward solve the lower triangular */ 1282 tmp[0] = b[*r++]; 1283 for (i=1; i<n; i++) { 1284 v = aa + ai[i] ; 1285 vi = aj + ai[i] ; 1286 nz = a->diag[i] - ai[i]; 1287 sum = b[*r++]; 1288 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1289 tmp[i] = sum; 1290 } 1291 1292 /* backward solve the upper triangular */ 1293 for (i=n-1; i>=0; i--){ 1294 v = aa + a->diag[i] + 1; 1295 vi = aj + a->diag[i] + 1; 1296 nz = ai[i+1] - a->diag[i] - 1; 1297 sum = tmp[i]; 1298 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1299 tmp[i] = sum*aa[a->diag[i]]; 1300 x[*c--] += tmp[i]; 1301 } 1302 1303 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1304 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1305 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1306 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1307 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1308 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1314 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1315 { 1316 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1317 IS iscol = a->col,isrow = a->row; 1318 PetscErrorCode ierr; 1319 PetscInt i, n = A->rmap->n,j; 1320 PetscInt nz; 1321 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1322 PetscScalar *x,*tmp,sum; 1323 const PetscScalar *b; 1324 const MatScalar *aa = a->a,*v; 1325 1326 PetscFunctionBegin; 1327 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1328 1329 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1330 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1331 tmp = a->solve_work; 1332 1333 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1334 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1335 1336 /* forward solve the lower triangular */ 1337 tmp[0] = b[r[0]]; 1338 v = aa; 1339 vi = aj; 1340 for (i=1; i<n; i++) { 1341 nz = ai[i+1] - ai[i]; 1342 sum = b[r[i]]; 1343 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1344 tmp[i] = sum; 1345 v += nz; vi += nz; 1346 } 1347 1348 /* backward solve the upper triangular */ 1349 v = aa + adiag[n-1]; 1350 vi = aj + adiag[n-1]; 1351 for (i=n-1; i>=0; i--){ 1352 nz = adiag[i] - adiag[i+1] - 1; 1353 sum = tmp[i]; 1354 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1355 tmp[i] = sum*v[nz]; 1356 x[c[i]] += tmp[i]; 1357 v += nz+1; vi += nz+1; 1358 } 1359 1360 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1361 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1362 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1363 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1364 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1365 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1371 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1372 { 1373 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1374 IS iscol = a->col,isrow = a->row; 1375 PetscErrorCode ierr; 1376 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1377 PetscInt i,n = A->rmap->n,j; 1378 PetscInt nz; 1379 PetscScalar *x,*tmp,s1; 1380 const MatScalar *aa = a->a,*v; 1381 const PetscScalar *b; 1382 1383 PetscFunctionBegin; 1384 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1385 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1386 tmp = a->solve_work; 1387 1388 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1389 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1390 1391 /* copy the b into temp work space according to permutation */ 1392 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1393 1394 /* forward solve the U^T */ 1395 for (i=0; i<n; i++) { 1396 v = aa + diag[i] ; 1397 vi = aj + diag[i] + 1; 1398 nz = ai[i+1] - diag[i] - 1; 1399 s1 = tmp[i]; 1400 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1401 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1402 tmp[i] = s1; 1403 } 1404 1405 /* backward solve the L^T */ 1406 for (i=n-1; i>=0; i--){ 1407 v = aa + diag[i] - 1 ; 1408 vi = aj + diag[i] - 1 ; 1409 nz = diag[i] - ai[i]; 1410 s1 = tmp[i]; 1411 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1412 } 1413 1414 /* copy tmp into x according to permutation */ 1415 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1416 1417 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1418 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1419 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1420 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1421 1422 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1428 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1429 { 1430 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1431 IS iscol = a->col,isrow = a->row; 1432 PetscErrorCode ierr; 1433 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1434 PetscInt i,n = A->rmap->n,j; 1435 PetscInt nz; 1436 PetscScalar *x,*tmp,s1; 1437 const MatScalar *aa = a->a,*v; 1438 const PetscScalar *b; 1439 1440 PetscFunctionBegin; 1441 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1442 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1443 tmp = a->solve_work; 1444 1445 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1446 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1447 1448 /* copy the b into temp work space according to permutation */ 1449 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1450 1451 /* forward solve the U^T */ 1452 for (i=0; i<n; i++) { 1453 v = aa + adiag[i+1] + 1; 1454 vi = aj + adiag[i+1] + 1; 1455 nz = adiag[i] - adiag[i+1] - 1; 1456 s1 = tmp[i]; 1457 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1458 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1459 tmp[i] = s1; 1460 } 1461 1462 /* backward solve the L^T */ 1463 for (i=n-1; i>=0; i--){ 1464 v = aa + ai[i]; 1465 vi = aj + ai[i]; 1466 nz = ai[i+1] - ai[i]; 1467 s1 = tmp[i]; 1468 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1469 } 1470 1471 /* copy tmp into x according to permutation */ 1472 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1473 1474 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1475 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1476 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1477 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1478 1479 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1485 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1486 { 1487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1488 IS iscol = a->col,isrow = a->row; 1489 PetscErrorCode ierr; 1490 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1491 PetscInt i,n = A->rmap->n,j; 1492 PetscInt nz; 1493 PetscScalar *x,*tmp,s1; 1494 const MatScalar *aa = a->a,*v; 1495 const PetscScalar *b; 1496 1497 PetscFunctionBegin; 1498 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1499 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1500 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1501 tmp = a->solve_work; 1502 1503 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1504 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1505 1506 /* copy the b into temp work space according to permutation */ 1507 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1508 1509 /* forward solve the U^T */ 1510 for (i=0; i<n; i++) { 1511 v = aa + diag[i] ; 1512 vi = aj + diag[i] + 1; 1513 nz = ai[i+1] - diag[i] - 1; 1514 s1 = tmp[i]; 1515 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1516 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1517 tmp[i] = s1; 1518 } 1519 1520 /* backward solve the L^T */ 1521 for (i=n-1; i>=0; i--){ 1522 v = aa + diag[i] - 1 ; 1523 vi = aj + diag[i] - 1 ; 1524 nz = diag[i] - ai[i]; 1525 s1 = tmp[i]; 1526 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1527 } 1528 1529 /* copy tmp into x according to permutation */ 1530 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1531 1532 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1533 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1534 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1535 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1536 1537 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1543 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1544 { 1545 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1546 IS iscol = a->col,isrow = a->row; 1547 PetscErrorCode ierr; 1548 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1549 PetscInt i,n = A->rmap->n,j; 1550 PetscInt nz; 1551 PetscScalar *x,*tmp,s1; 1552 const MatScalar *aa = a->a,*v; 1553 const PetscScalar *b; 1554 1555 PetscFunctionBegin; 1556 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1557 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1558 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1559 tmp = a->solve_work; 1560 1561 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1562 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1563 1564 /* copy the b into temp work space according to permutation */ 1565 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1566 1567 /* forward solve the U^T */ 1568 for (i=0; i<n; i++) { 1569 v = aa + adiag[i+1] + 1; 1570 vi = aj + adiag[i+1] + 1; 1571 nz = adiag[i] - adiag[i+1] - 1; 1572 s1 = tmp[i]; 1573 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1574 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1575 tmp[i] = s1; 1576 } 1577 1578 1579 /* backward solve the L^T */ 1580 for (i=n-1; i>=0; i--){ 1581 v = aa + ai[i] ; 1582 vi = aj + ai[i]; 1583 nz = ai[i+1] - ai[i]; 1584 s1 = tmp[i]; 1585 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1586 } 1587 1588 /* copy tmp into x according to permutation */ 1589 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1590 1591 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1592 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1593 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1594 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1595 1596 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 /* ----------------------------------------------------------------*/ 1601 1602 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool ); 1603 1604 /* 1605 ilu() under revised new data structure. 1606 Factored arrays bj and ba are stored as 1607 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1608 1609 bi=fact->i is an array of size n+1, in which 1610 bi+ 1611 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1612 bi[n]: points to L(n-1,n-1)+1 1613 1614 bdiag=fact->diag is an array of size n+1,in which 1615 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1616 bdiag[n]: points to entry of U(n-1,0)-1 1617 1618 U(i,:) contains bdiag[i] as its last entry, i.e., 1619 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1620 */ 1621 #undef __FUNCT__ 1622 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1623 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1624 { 1625 1626 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1627 PetscErrorCode ierr; 1628 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1629 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1630 PetscBool missing; 1631 IS isicol; 1632 1633 PetscFunctionBegin; 1634 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1635 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1636 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1637 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1638 1639 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1640 b = (Mat_SeqAIJ*)(fact)->data; 1641 1642 /* allocate matrix arrays for new data structure */ 1643 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1644 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1645 b->singlemalloc = PETSC_TRUE; 1646 if (!b->diag){ 1647 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1648 ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1649 } 1650 bdiag = b->diag; 1651 1652 if (n > 0) { 1653 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1654 } 1655 1656 /* set bi and bj with new data structure */ 1657 bi = b->i; 1658 bj = b->j; 1659 1660 /* L part */ 1661 bi[0] = 0; 1662 for (i=0; i<n; i++){ 1663 nz = adiag[i] - ai[i]; 1664 bi[i+1] = bi[i] + nz; 1665 aj = a->j + ai[i]; 1666 for (j=0; j<nz; j++){ 1667 /* *bj = aj[j]; bj++; */ 1668 bj[k++] = aj[j]; 1669 } 1670 } 1671 1672 /* U part */ 1673 bdiag[n] = bi[n]-1; 1674 for (i=n-1; i>=0; i--){ 1675 nz = ai[i+1] - adiag[i] - 1; 1676 aj = a->j + adiag[i] + 1; 1677 for (j=0; j<nz; j++){ 1678 /* *bj = aj[j]; bj++; */ 1679 bj[k++] = aj[j]; 1680 } 1681 /* diag[i] */ 1682 /* *bj = i; bj++; */ 1683 bj[k++] = i; 1684 bdiag[i] = bdiag[i+1] + nz + 1; 1685 } 1686 1687 fact->factortype = MAT_FACTOR_ILU; 1688 fact->info.factor_mallocs = 0; 1689 fact->info.fill_ratio_given = info->fill; 1690 fact->info.fill_ratio_needed = 1.0; 1691 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1692 1693 b = (Mat_SeqAIJ*)(fact)->data; 1694 b->row = isrow; 1695 b->col = iscol; 1696 b->icol = isicol; 1697 ierr = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1698 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1699 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1705 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1706 { 1707 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1708 IS isicol; 1709 PetscErrorCode ierr; 1710 const PetscInt *r,*ic; 1711 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1712 PetscInt *bi,*cols,nnz,*cols_lvl; 1713 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1714 PetscInt i,levels,diagonal_fill; 1715 PetscBool col_identity,row_identity; 1716 PetscReal f; 1717 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1718 PetscBT lnkbt; 1719 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1720 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1721 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1722 1723 PetscFunctionBegin; 1724 /* Uncomment the old data struct part only while testing new data structure for MatSolve() */ 1725 /* 1726 PetscBool olddatastruct=PETSC_FALSE; 1727 ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 1728 if(olddatastruct){ 1729 ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1730 PetscFunctionReturn(0); 1731 } 1732 */ 1733 1734 levels = (PetscInt)info->levels; 1735 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1736 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1737 1738 if (!levels && row_identity && col_identity) { 1739 /* special case: ilu(0) with natural ordering */ 1740 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1741 if (a->inode.size) { 1742 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1743 } 1744 PetscFunctionReturn(0); 1745 } 1746 1747 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1748 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1749 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1750 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1751 1752 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1753 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1754 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1755 bi[0] = bdiag[0] = 0; 1756 1757 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1758 1759 /* create a linked list for storing column indices of the active row */ 1760 nlnk = n + 1; 1761 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1762 1763 /* initial FreeSpace size is f*(ai[n]+1) */ 1764 f = info->fill; 1765 diagonal_fill = (PetscInt)info->diagonal_fill; 1766 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1767 current_space = free_space; 1768 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1769 current_space_lvl = free_space_lvl; 1770 1771 for (i=0; i<n; i++) { 1772 nzi = 0; 1773 /* copy current row into linked list */ 1774 nnz = ai[r[i]+1] - ai[r[i]]; 1775 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1776 cols = aj + ai[r[i]]; 1777 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1778 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1779 nzi += nlnk; 1780 1781 /* make sure diagonal entry is included */ 1782 if (diagonal_fill && lnk[i] == -1) { 1783 fm = n; 1784 while (lnk[fm] < i) fm = lnk[fm]; 1785 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1786 lnk[fm] = i; 1787 lnk_lvl[i] = 0; 1788 nzi++; dcount++; 1789 } 1790 1791 /* add pivot rows into the active row */ 1792 nzbd = 0; 1793 prow = lnk[n]; 1794 while (prow < i) { 1795 nnz = bdiag[prow]; 1796 cols = bj_ptr[prow] + nnz + 1; 1797 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1798 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1799 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1800 nzi += nlnk; 1801 prow = lnk[prow]; 1802 nzbd++; 1803 } 1804 bdiag[i] = nzbd; 1805 bi[i+1] = bi[i] + nzi; 1806 1807 /* if free space is not available, make more free space */ 1808 if (current_space->local_remaining<nzi) { 1809 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1810 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1811 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1812 reallocs++; 1813 } 1814 1815 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1816 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1817 bj_ptr[i] = current_space->array; 1818 bjlvl_ptr[i] = current_space_lvl->array; 1819 1820 /* make sure the active row i has diagonal entry */ 1821 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1822 1823 current_space->array += nzi; 1824 current_space->local_used += nzi; 1825 current_space->local_remaining -= nzi; 1826 current_space_lvl->array += nzi; 1827 current_space_lvl->local_used += nzi; 1828 current_space_lvl->local_remaining -= nzi; 1829 } 1830 1831 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1832 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1833 1834 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1835 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1836 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1837 1838 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1839 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1840 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1841 1842 #if defined(PETSC_USE_INFO) 1843 { 1844 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1845 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1846 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1847 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1848 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1849 if (diagonal_fill) { 1850 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1851 } 1852 } 1853 #endif 1854 1855 /* put together the new matrix */ 1856 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1857 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1858 b = (Mat_SeqAIJ*)(fact)->data; 1859 b->free_a = PETSC_TRUE; 1860 b->free_ij = PETSC_TRUE; 1861 b->singlemalloc = PETSC_FALSE; 1862 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1863 b->j = bj; 1864 b->i = bi; 1865 b->diag = bdiag; 1866 b->ilen = 0; 1867 b->imax = 0; 1868 b->row = isrow; 1869 b->col = iscol; 1870 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1871 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1872 b->icol = isicol; 1873 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1874 /* In b structure: Free imax, ilen, old a, old j. 1875 Allocate bdiag, solve_work, new a, new j */ 1876 ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1877 b->maxnz = b->nz = bdiag[0]+1; 1878 (fact)->info.factor_mallocs = reallocs; 1879 (fact)->info.fill_ratio_given = f; 1880 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1881 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1882 if (a->inode.size) { 1883 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1884 } 1885 PetscFunctionReturn(0); 1886 } 1887 1888 #undef __FUNCT__ 1889 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1890 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1891 { 1892 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1893 IS isicol; 1894 PetscErrorCode ierr; 1895 const PetscInt *r,*ic; 1896 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1897 PetscInt *bi,*cols,nnz,*cols_lvl; 1898 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1899 PetscInt i,levels,diagonal_fill; 1900 PetscBool col_identity,row_identity; 1901 PetscReal f; 1902 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1903 PetscBT lnkbt; 1904 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1905 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1906 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1907 PetscBool missing; 1908 1909 PetscFunctionBegin; 1910 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1911 f = info->fill; 1912 levels = (PetscInt)info->levels; 1913 diagonal_fill = (PetscInt)info->diagonal_fill; 1914 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1915 1916 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1917 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1918 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1919 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1920 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1921 if (a->inode.size) { 1922 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1923 } 1924 fact->factortype = MAT_FACTOR_ILU; 1925 (fact)->info.factor_mallocs = 0; 1926 (fact)->info.fill_ratio_given = info->fill; 1927 (fact)->info.fill_ratio_needed = 1.0; 1928 b = (Mat_SeqAIJ*)(fact)->data; 1929 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1930 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1931 b->row = isrow; 1932 b->col = iscol; 1933 b->icol = isicol; 1934 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1935 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1936 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1937 PetscFunctionReturn(0); 1938 } 1939 1940 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1941 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1942 1943 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1944 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1945 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1946 bi[0] = bdiag[0] = 0; 1947 1948 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1949 1950 /* create a linked list for storing column indices of the active row */ 1951 nlnk = n + 1; 1952 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1953 1954 /* initial FreeSpace size is f*(ai[n]+1) */ 1955 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1956 current_space = free_space; 1957 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1958 current_space_lvl = free_space_lvl; 1959 1960 for (i=0; i<n; i++) { 1961 nzi = 0; 1962 /* copy current row into linked list */ 1963 nnz = ai[r[i]+1] - ai[r[i]]; 1964 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1965 cols = aj + ai[r[i]]; 1966 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1967 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1968 nzi += nlnk; 1969 1970 /* make sure diagonal entry is included */ 1971 if (diagonal_fill && lnk[i] == -1) { 1972 fm = n; 1973 while (lnk[fm] < i) fm = lnk[fm]; 1974 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1975 lnk[fm] = i; 1976 lnk_lvl[i] = 0; 1977 nzi++; dcount++; 1978 } 1979 1980 /* add pivot rows into the active row */ 1981 nzbd = 0; 1982 prow = lnk[n]; 1983 while (prow < i) { 1984 nnz = bdiag[prow]; 1985 cols = bj_ptr[prow] + nnz + 1; 1986 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1987 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1988 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1989 nzi += nlnk; 1990 prow = lnk[prow]; 1991 nzbd++; 1992 } 1993 bdiag[i] = nzbd; 1994 bi[i+1] = bi[i] + nzi; 1995 1996 /* if free space is not available, make more free space */ 1997 if (current_space->local_remaining<nzi) { 1998 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1999 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2000 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2001 reallocs++; 2002 } 2003 2004 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2005 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2006 bj_ptr[i] = current_space->array; 2007 bjlvl_ptr[i] = current_space_lvl->array; 2008 2009 /* make sure the active row i has diagonal entry */ 2010 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2011 2012 current_space->array += nzi; 2013 current_space->local_used += nzi; 2014 current_space->local_remaining -= nzi; 2015 current_space_lvl->array += nzi; 2016 current_space_lvl->local_used += nzi; 2017 current_space_lvl->local_remaining -= nzi; 2018 } 2019 2020 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2021 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2022 2023 /* destroy list of free space and other temporary arrays */ 2024 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2025 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2026 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2027 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2028 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2029 2030 #if defined(PETSC_USE_INFO) 2031 { 2032 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2033 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2034 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2035 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2036 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2037 if (diagonal_fill) { 2038 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2039 } 2040 } 2041 #endif 2042 2043 /* put together the new matrix */ 2044 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2045 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2046 b = (Mat_SeqAIJ*)(fact)->data; 2047 b->free_a = PETSC_TRUE; 2048 b->free_ij = PETSC_TRUE; 2049 b->singlemalloc = PETSC_FALSE; 2050 ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2051 b->j = bj; 2052 b->i = bi; 2053 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2054 b->diag = bdiag; 2055 b->ilen = 0; 2056 b->imax = 0; 2057 b->row = isrow; 2058 b->col = iscol; 2059 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2060 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2061 b->icol = isicol; 2062 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2063 /* In b structure: Free imax, ilen, old a, old j. 2064 Allocate bdiag, solve_work, new a, new j */ 2065 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2066 b->maxnz = b->nz = bi[n] ; 2067 (fact)->info.factor_mallocs = reallocs; 2068 (fact)->info.fill_ratio_given = f; 2069 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2070 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2071 if (a->inode.size) { 2072 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2079 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2080 { 2081 Mat C = B; 2082 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2083 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2084 IS ip=b->row,iip = b->icol; 2085 PetscErrorCode ierr; 2086 const PetscInt *rip,*riip; 2087 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2088 PetscInt *ai=a->i,*aj=a->j; 2089 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2090 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2091 PetscBool perm_identity; 2092 FactorShiftCtx sctx; 2093 PetscReal rs; 2094 MatScalar d,*v; 2095 2096 PetscFunctionBegin; 2097 /* MatPivotSetUp(): initialize shift context sctx */ 2098 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2099 2100 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2101 sctx.shift_top = info->zeropivot; 2102 for (i=0; i<mbs; i++) { 2103 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2104 d = (aa)[a->diag[i]]; 2105 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2106 v = aa+ai[i]; 2107 nz = ai[i+1] - ai[i]; 2108 for (j=0; j<nz; j++) 2109 rs += PetscAbsScalar(v[j]); 2110 if (rs>sctx.shift_top) sctx.shift_top = rs; 2111 } 2112 sctx.shift_top *= 1.1; 2113 sctx.nshift_max = 5; 2114 sctx.shift_lo = 0.; 2115 sctx.shift_hi = 1.; 2116 } 2117 2118 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2119 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2120 2121 /* allocate working arrays 2122 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2123 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2124 */ 2125 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 2126 2127 do { 2128 sctx.newshift = PETSC_FALSE; 2129 2130 for (i=0; i<mbs; i++) c2r[i] = mbs; 2131 il[0] = 0; 2132 2133 for (k = 0; k<mbs; k++){ 2134 /* zero rtmp */ 2135 nz = bi[k+1] - bi[k]; 2136 bjtmp = bj + bi[k]; 2137 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2138 2139 /* load in initial unfactored row */ 2140 bval = ba + bi[k]; 2141 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2142 for (j = jmin; j < jmax; j++){ 2143 col = riip[aj[j]]; 2144 if (col >= k){ /* only take upper triangular entry */ 2145 rtmp[col] = aa[j]; 2146 *bval++ = 0.0; /* for in-place factorization */ 2147 } 2148 } 2149 /* shift the diagonal of the matrix: ZeropivotApply() */ 2150 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2151 2152 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2153 dk = rtmp[k]; 2154 i = c2r[k]; /* first row to be added to k_th row */ 2155 2156 while (i < k){ 2157 nexti = c2r[i]; /* next row to be added to k_th row */ 2158 2159 /* compute multiplier, update diag(k) and U(i,k) */ 2160 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2161 uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2162 dk += uikdi*ba[ili]; /* update diag[k] */ 2163 ba[ili] = uikdi; /* -U(i,k) */ 2164 2165 /* add multiple of row i to k-th row */ 2166 jmin = ili + 1; jmax = bi[i+1]; 2167 if (jmin < jmax){ 2168 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2169 /* update il and c2r for row i */ 2170 il[i] = jmin; 2171 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2172 } 2173 i = nexti; 2174 } 2175 2176 /* copy data into U(k,:) */ 2177 rs = 0.0; 2178 jmin = bi[k]; jmax = bi[k+1]-1; 2179 if (jmin < jmax) { 2180 for (j=jmin; j<jmax; j++){ 2181 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2182 } 2183 /* add the k-th row into il and c2r */ 2184 il[k] = jmin; 2185 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2186 } 2187 2188 /* MatPivotCheck() */ 2189 sctx.rs = rs; 2190 sctx.pv = dk; 2191 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2192 if(sctx.newshift) break; 2193 dk = sctx.pv; 2194 2195 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2196 } 2197 } while (sctx.newshift); 2198 2199 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2200 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2201 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2202 2203 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2204 if (perm_identity){ 2205 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2206 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2207 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2208 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2209 } else { 2210 B->ops->solve = MatSolve_SeqSBAIJ_1; 2211 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2212 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2213 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2214 } 2215 2216 C->assembled = PETSC_TRUE; 2217 C->preallocated = PETSC_TRUE; 2218 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2219 2220 /* MatPivotView() */ 2221 if (sctx.nshift){ 2222 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2223 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 2224 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2225 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2226 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 2227 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 2228 } 2229 } 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2235 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2236 { 2237 Mat C = B; 2238 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2239 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2240 IS ip=b->row,iip = b->icol; 2241 PetscErrorCode ierr; 2242 const PetscInt *rip,*riip; 2243 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2244 PetscInt *ai=a->i,*aj=a->j; 2245 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2246 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2247 PetscBool perm_identity; 2248 FactorShiftCtx sctx; 2249 PetscReal rs; 2250 MatScalar d,*v; 2251 2252 PetscFunctionBegin; 2253 /* MatPivotSetUp(): initialize shift context sctx */ 2254 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2255 2256 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2257 sctx.shift_top = info->zeropivot; 2258 for (i=0; i<mbs; i++) { 2259 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2260 d = (aa)[a->diag[i]]; 2261 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2262 v = aa+ai[i]; 2263 nz = ai[i+1] - ai[i]; 2264 for (j=0; j<nz; j++) 2265 rs += PetscAbsScalar(v[j]); 2266 if (rs>sctx.shift_top) sctx.shift_top = rs; 2267 } 2268 sctx.shift_top *= 1.1; 2269 sctx.nshift_max = 5; 2270 sctx.shift_lo = 0.; 2271 sctx.shift_hi = 1.; 2272 } 2273 2274 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2275 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2276 2277 /* initialization */ 2278 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 2279 2280 do { 2281 sctx.newshift = PETSC_FALSE; 2282 2283 for (i=0; i<mbs; i++) jl[i] = mbs; 2284 il[0] = 0; 2285 2286 for (k = 0; k<mbs; k++){ 2287 /* zero rtmp */ 2288 nz = bi[k+1] - bi[k]; 2289 bjtmp = bj + bi[k]; 2290 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2291 2292 bval = ba + bi[k]; 2293 /* initialize k-th row by the perm[k]-th row of A */ 2294 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2295 for (j = jmin; j < jmax; j++){ 2296 col = riip[aj[j]]; 2297 if (col >= k){ /* only take upper triangular entry */ 2298 rtmp[col] = aa[j]; 2299 *bval++ = 0.0; /* for in-place factorization */ 2300 } 2301 } 2302 /* shift the diagonal of the matrix */ 2303 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2304 2305 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2306 dk = rtmp[k]; 2307 i = jl[k]; /* first row to be added to k_th row */ 2308 2309 while (i < k){ 2310 nexti = jl[i]; /* next row to be added to k_th row */ 2311 2312 /* compute multiplier, update diag(k) and U(i,k) */ 2313 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2314 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2315 dk += uikdi*ba[ili]; 2316 ba[ili] = uikdi; /* -U(i,k) */ 2317 2318 /* add multiple of row i to k-th row */ 2319 jmin = ili + 1; jmax = bi[i+1]; 2320 if (jmin < jmax){ 2321 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2322 /* update il and jl for row i */ 2323 il[i] = jmin; 2324 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2325 } 2326 i = nexti; 2327 } 2328 2329 /* shift the diagonals when zero pivot is detected */ 2330 /* compute rs=sum of abs(off-diagonal) */ 2331 rs = 0.0; 2332 jmin = bi[k]+1; 2333 nz = bi[k+1] - jmin; 2334 bcol = bj + jmin; 2335 for (j=0; j<nz; j++) { 2336 rs += PetscAbsScalar(rtmp[bcol[j]]); 2337 } 2338 2339 sctx.rs = rs; 2340 sctx.pv = dk; 2341 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2342 if (sctx.newshift) break; 2343 dk = sctx.pv; 2344 2345 /* copy data into U(k,:) */ 2346 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2347 jmin = bi[k]+1; jmax = bi[k+1]; 2348 if (jmin < jmax) { 2349 for (j=jmin; j<jmax; j++){ 2350 col = bj[j]; ba[j] = rtmp[col]; 2351 } 2352 /* add the k-th row into il and jl */ 2353 il[k] = jmin; 2354 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2355 } 2356 } 2357 } while (sctx.newshift); 2358 2359 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2360 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2361 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2362 2363 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2364 if (perm_identity){ 2365 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2366 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2367 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2368 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2369 } else { 2370 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2371 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2372 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2373 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2374 } 2375 2376 C->assembled = PETSC_TRUE; 2377 C->preallocated = PETSC_TRUE; 2378 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2379 if (sctx.nshift){ 2380 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2381 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2382 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2383 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2384 } 2385 } 2386 PetscFunctionReturn(0); 2387 } 2388 2389 /* 2390 icc() under revised new data structure. 2391 Factored arrays bj and ba are stored as 2392 U(0,:),...,U(i,:),U(n-1,:) 2393 2394 ui=fact->i is an array of size n+1, in which 2395 ui+ 2396 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2397 ui[n]: points to U(n-1,n-1)+1 2398 2399 udiag=fact->diag is an array of size n,in which 2400 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2401 2402 U(i,:) contains udiag[i] as its last entry, i.e., 2403 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2404 */ 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2408 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2409 { 2410 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2411 Mat_SeqSBAIJ *b; 2412 PetscErrorCode ierr; 2413 PetscBool perm_identity,missing; 2414 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2415 const PetscInt *rip,*riip; 2416 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2417 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2418 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2419 PetscReal fill=info->fill,levels=info->levels; 2420 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2421 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2422 PetscBT lnkbt; 2423 IS iperm; 2424 2425 PetscFunctionBegin; 2426 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2427 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2428 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2429 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2430 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2431 2432 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2433 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2434 ui[0] = 0; 2435 2436 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2437 if (!levels && perm_identity) { 2438 for (i=0; i<am; i++) { 2439 ncols = ai[i+1] - a->diag[i]; 2440 ui[i+1] = ui[i] + ncols; 2441 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2442 } 2443 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2444 cols = uj; 2445 for (i=0; i<am; i++) { 2446 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2447 ncols = ai[i+1] - a->diag[i] -1; 2448 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2449 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2450 } 2451 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2452 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2453 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2454 2455 /* initialization */ 2456 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2457 2458 /* jl: linked list for storing indices of the pivot rows 2459 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2460 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2461 for (i=0; i<am; i++){ 2462 jl[i] = am; il[i] = 0; 2463 } 2464 2465 /* create and initialize a linked list for storing column indices of the active row k */ 2466 nlnk = am + 1; 2467 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2468 2469 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2470 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2471 current_space = free_space; 2472 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2473 current_space_lvl = free_space_lvl; 2474 2475 for (k=0; k<am; k++){ /* for each active row k */ 2476 /* initialize lnk by the column indices of row rip[k] of A */ 2477 nzk = 0; 2478 ncols = ai[rip[k]+1] - ai[rip[k]]; 2479 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2480 ncols_upper = 0; 2481 for (j=0; j<ncols; j++){ 2482 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2483 if (riip[i] >= k){ /* only take upper triangular entry */ 2484 ajtmp[ncols_upper] = i; 2485 ncols_upper++; 2486 } 2487 } 2488 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2489 nzk += nlnk; 2490 2491 /* update lnk by computing fill-in for each pivot row to be merged in */ 2492 prow = jl[k]; /* 1st pivot row */ 2493 2494 while (prow < k){ 2495 nextprow = jl[prow]; 2496 2497 /* merge prow into k-th row */ 2498 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2499 jmax = ui[prow+1]; 2500 ncols = jmax-jmin; 2501 i = jmin - ui[prow]; 2502 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2503 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2504 j = *(uj - 1); 2505 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2506 nzk += nlnk; 2507 2508 /* update il and jl for prow */ 2509 if (jmin < jmax){ 2510 il[prow] = jmin; 2511 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2512 } 2513 prow = nextprow; 2514 } 2515 2516 /* if free space is not available, make more free space */ 2517 if (current_space->local_remaining<nzk) { 2518 i = am - k + 1; /* num of unfactored rows */ 2519 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2520 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2521 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2522 reallocs++; 2523 } 2524 2525 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2526 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2527 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2528 2529 /* add the k-th row into il and jl */ 2530 if (nzk > 1){ 2531 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2532 jl[k] = jl[i]; jl[i] = k; 2533 il[k] = ui[k] + 1; 2534 } 2535 uj_ptr[k] = current_space->array; 2536 uj_lvl_ptr[k] = current_space_lvl->array; 2537 2538 current_space->array += nzk; 2539 current_space->local_used += nzk; 2540 current_space->local_remaining -= nzk; 2541 2542 current_space_lvl->array += nzk; 2543 current_space_lvl->local_used += nzk; 2544 current_space_lvl->local_remaining -= nzk; 2545 2546 ui[k+1] = ui[k] + nzk; 2547 } 2548 2549 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2550 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2551 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2552 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2553 2554 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2555 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2556 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2557 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2558 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2559 2560 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2561 2562 /* put together the new matrix in MATSEQSBAIJ format */ 2563 b = (Mat_SeqSBAIJ*)(fact)->data; 2564 b->singlemalloc = PETSC_FALSE; 2565 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2566 b->j = uj; 2567 b->i = ui; 2568 b->diag = udiag; 2569 b->free_diag = PETSC_TRUE; 2570 b->ilen = 0; 2571 b->imax = 0; 2572 b->row = perm; 2573 b->col = perm; 2574 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2575 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2576 b->icol = iperm; 2577 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2578 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2579 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2580 b->maxnz = b->nz = ui[am]; 2581 b->free_a = PETSC_TRUE; 2582 b->free_ij = PETSC_TRUE; 2583 2584 fact->info.factor_mallocs = reallocs; 2585 fact->info.fill_ratio_given = fill; 2586 if (ai[am] != 0) { 2587 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2588 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2589 } else { 2590 fact->info.fill_ratio_needed = 0.0; 2591 } 2592 #if defined(PETSC_USE_INFO) 2593 if (ai[am] != 0) { 2594 PetscReal af = fact->info.fill_ratio_needed; 2595 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2596 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2597 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2598 } else { 2599 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2600 } 2601 #endif 2602 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2603 PetscFunctionReturn(0); 2604 } 2605 2606 #undef __FUNCT__ 2607 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2608 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2609 { 2610 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2611 Mat_SeqSBAIJ *b; 2612 PetscErrorCode ierr; 2613 PetscBool perm_identity,missing; 2614 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2615 const PetscInt *rip,*riip; 2616 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2617 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2618 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2619 PetscReal fill=info->fill,levels=info->levels; 2620 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2621 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2622 PetscBT lnkbt; 2623 IS iperm; 2624 2625 PetscFunctionBegin; 2626 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2627 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2628 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2629 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2630 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2631 2632 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2633 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2634 ui[0] = 0; 2635 2636 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2637 if (!levels && perm_identity) { 2638 2639 for (i=0; i<am; i++) { 2640 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2641 udiag[i] = ui[i]; 2642 } 2643 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2644 cols = uj; 2645 for (i=0; i<am; i++) { 2646 aj = a->j + a->diag[i]; 2647 ncols = ui[i+1] - ui[i]; 2648 for (j=0; j<ncols; j++) *cols++ = *aj++; 2649 } 2650 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2651 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2652 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2653 2654 /* initialization */ 2655 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2656 2657 /* jl: linked list for storing indices of the pivot rows 2658 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2659 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2660 for (i=0; i<am; i++){ 2661 jl[i] = am; il[i] = 0; 2662 } 2663 2664 /* create and initialize a linked list for storing column indices of the active row k */ 2665 nlnk = am + 1; 2666 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2667 2668 /* initial FreeSpace size is fill*(ai[am]+1) */ 2669 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2670 current_space = free_space; 2671 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2672 current_space_lvl = free_space_lvl; 2673 2674 for (k=0; k<am; k++){ /* for each active row k */ 2675 /* initialize lnk by the column indices of row rip[k] of A */ 2676 nzk = 0; 2677 ncols = ai[rip[k]+1] - ai[rip[k]]; 2678 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2679 ncols_upper = 0; 2680 for (j=0; j<ncols; j++){ 2681 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2682 if (riip[i] >= k){ /* only take upper triangular entry */ 2683 ajtmp[ncols_upper] = i; 2684 ncols_upper++; 2685 } 2686 } 2687 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2688 nzk += nlnk; 2689 2690 /* update lnk by computing fill-in for each pivot row to be merged in */ 2691 prow = jl[k]; /* 1st pivot row */ 2692 2693 while (prow < k){ 2694 nextprow = jl[prow]; 2695 2696 /* merge prow into k-th row */ 2697 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2698 jmax = ui[prow+1]; 2699 ncols = jmax-jmin; 2700 i = jmin - ui[prow]; 2701 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2702 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2703 j = *(uj - 1); 2704 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2705 nzk += nlnk; 2706 2707 /* update il and jl for prow */ 2708 if (jmin < jmax){ 2709 il[prow] = jmin; 2710 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2711 } 2712 prow = nextprow; 2713 } 2714 2715 /* if free space is not available, make more free space */ 2716 if (current_space->local_remaining<nzk) { 2717 i = am - k + 1; /* num of unfactored rows */ 2718 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2719 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2720 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2721 reallocs++; 2722 } 2723 2724 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2725 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2726 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2727 2728 /* add the k-th row into il and jl */ 2729 if (nzk > 1){ 2730 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2731 jl[k] = jl[i]; jl[i] = k; 2732 il[k] = ui[k] + 1; 2733 } 2734 uj_ptr[k] = current_space->array; 2735 uj_lvl_ptr[k] = current_space_lvl->array; 2736 2737 current_space->array += nzk; 2738 current_space->local_used += nzk; 2739 current_space->local_remaining -= nzk; 2740 2741 current_space_lvl->array += nzk; 2742 current_space_lvl->local_used += nzk; 2743 current_space_lvl->local_remaining -= nzk; 2744 2745 ui[k+1] = ui[k] + nzk; 2746 } 2747 2748 #if defined(PETSC_USE_INFO) 2749 if (ai[am] != 0) { 2750 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2751 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2752 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2753 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2754 } else { 2755 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2756 } 2757 #endif 2758 2759 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2760 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2761 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2762 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2763 2764 /* destroy list of free space and other temporary array(s) */ 2765 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2766 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2767 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2768 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2769 2770 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2771 2772 /* put together the new matrix in MATSEQSBAIJ format */ 2773 2774 b = (Mat_SeqSBAIJ*)fact->data; 2775 b->singlemalloc = PETSC_FALSE; 2776 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2777 b->j = uj; 2778 b->i = ui; 2779 b->diag = udiag; 2780 b->free_diag = PETSC_TRUE; 2781 b->ilen = 0; 2782 b->imax = 0; 2783 b->row = perm; 2784 b->col = perm; 2785 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2786 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2787 b->icol = iperm; 2788 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2789 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2790 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2791 b->maxnz = b->nz = ui[am]; 2792 b->free_a = PETSC_TRUE; 2793 b->free_ij = PETSC_TRUE; 2794 2795 fact->info.factor_mallocs = reallocs; 2796 fact->info.fill_ratio_given = fill; 2797 if (ai[am] != 0) { 2798 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2799 } else { 2800 fact->info.fill_ratio_needed = 0.0; 2801 } 2802 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2803 PetscFunctionReturn(0); 2804 } 2805 2806 #undef __FUNCT__ 2807 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2808 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2809 { 2810 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2811 Mat_SeqSBAIJ *b; 2812 PetscErrorCode ierr; 2813 PetscBool perm_identity; 2814 PetscReal fill = info->fill; 2815 const PetscInt *rip,*riip; 2816 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2817 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2818 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2819 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2820 PetscBT lnkbt; 2821 IS iperm; 2822 2823 PetscFunctionBegin; 2824 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2825 /* check whether perm is the identity mapping */ 2826 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2827 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2828 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2829 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2830 2831 /* initialization */ 2832 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2833 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2834 ui[0] = 0; 2835 2836 /* jl: linked list for storing indices of the pivot rows 2837 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2838 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2839 for (i=0; i<am; i++){ 2840 jl[i] = am; il[i] = 0; 2841 } 2842 2843 /* create and initialize a linked list for storing column indices of the active row k */ 2844 nlnk = am + 1; 2845 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2846 2847 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2848 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2849 current_space = free_space; 2850 2851 for (k=0; k<am; k++){ /* for each active row k */ 2852 /* initialize lnk by the column indices of row rip[k] of A */ 2853 nzk = 0; 2854 ncols = ai[rip[k]+1] - ai[rip[k]]; 2855 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2856 ncols_upper = 0; 2857 for (j=0; j<ncols; j++){ 2858 i = riip[*(aj + ai[rip[k]] + j)]; 2859 if (i >= k){ /* only take upper triangular entry */ 2860 cols[ncols_upper] = i; 2861 ncols_upper++; 2862 } 2863 } 2864 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2865 nzk += nlnk; 2866 2867 /* update lnk by computing fill-in for each pivot row to be merged in */ 2868 prow = jl[k]; /* 1st pivot row */ 2869 2870 while (prow < k){ 2871 nextprow = jl[prow]; 2872 /* merge prow into k-th row */ 2873 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2874 jmax = ui[prow+1]; 2875 ncols = jmax-jmin; 2876 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2877 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2878 nzk += nlnk; 2879 2880 /* update il and jl for prow */ 2881 if (jmin < jmax){ 2882 il[prow] = jmin; 2883 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 2884 } 2885 prow = nextprow; 2886 } 2887 2888 /* if free space is not available, make more free space */ 2889 if (current_space->local_remaining<nzk) { 2890 i = am - k + 1; /* num of unfactored rows */ 2891 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2892 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2893 reallocs++; 2894 } 2895 2896 /* copy data into free space, then initialize lnk */ 2897 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2898 2899 /* add the k-th row into il and jl */ 2900 if (nzk > 1){ 2901 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2902 jl[k] = jl[i]; jl[i] = k; 2903 il[k] = ui[k] + 1; 2904 } 2905 ui_ptr[k] = current_space->array; 2906 current_space->array += nzk; 2907 current_space->local_used += nzk; 2908 current_space->local_remaining -= nzk; 2909 2910 ui[k+1] = ui[k] + nzk; 2911 } 2912 2913 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2914 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2915 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2916 2917 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2918 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2919 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2920 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2921 2922 /* put together the new matrix in MATSEQSBAIJ format */ 2923 2924 b = (Mat_SeqSBAIJ*)fact->data; 2925 b->singlemalloc = PETSC_FALSE; 2926 b->free_a = PETSC_TRUE; 2927 b->free_ij = PETSC_TRUE; 2928 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2929 b->j = uj; 2930 b->i = ui; 2931 b->diag = udiag; 2932 b->free_diag = PETSC_TRUE; 2933 b->ilen = 0; 2934 b->imax = 0; 2935 b->row = perm; 2936 b->col = perm; 2937 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2938 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2939 b->icol = iperm; 2940 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2941 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2942 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2943 b->maxnz = b->nz = ui[am]; 2944 2945 fact->info.factor_mallocs = reallocs; 2946 fact->info.fill_ratio_given = fill; 2947 if (ai[am] != 0) { 2948 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2949 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2950 } else { 2951 fact->info.fill_ratio_needed = 0.0; 2952 } 2953 #if defined(PETSC_USE_INFO) 2954 if (ai[am] != 0) { 2955 PetscReal af = fact->info.fill_ratio_needed; 2956 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2957 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2958 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2959 } else { 2960 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2961 } 2962 #endif 2963 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2964 PetscFunctionReturn(0); 2965 } 2966 2967 #undef __FUNCT__ 2968 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 2969 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2970 { 2971 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2972 Mat_SeqSBAIJ *b; 2973 PetscErrorCode ierr; 2974 PetscBool perm_identity; 2975 PetscReal fill = info->fill; 2976 const PetscInt *rip,*riip; 2977 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2978 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2979 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2980 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2981 PetscBT lnkbt; 2982 IS iperm; 2983 2984 PetscFunctionBegin; 2985 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2986 /* check whether perm is the identity mapping */ 2987 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2988 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2989 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2990 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2991 2992 /* initialization */ 2993 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2994 ui[0] = 0; 2995 2996 /* jl: linked list for storing indices of the pivot rows 2997 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2998 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2999 for (i=0; i<am; i++){ 3000 jl[i] = am; il[i] = 0; 3001 } 3002 3003 /* create and initialize a linked list for storing column indices of the active row k */ 3004 nlnk = am + 1; 3005 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3006 3007 /* initial FreeSpace size is fill*(ai[am]+1) */ 3008 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3009 current_space = free_space; 3010 3011 for (k=0; k<am; k++){ /* for each active row k */ 3012 /* initialize lnk by the column indices of row rip[k] of A */ 3013 nzk = 0; 3014 ncols = ai[rip[k]+1] - ai[rip[k]]; 3015 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3016 ncols_upper = 0; 3017 for (j=0; j<ncols; j++){ 3018 i = riip[*(aj + ai[rip[k]] + j)]; 3019 if (i >= k){ /* only take upper triangular entry */ 3020 cols[ncols_upper] = i; 3021 ncols_upper++; 3022 } 3023 } 3024 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3025 nzk += nlnk; 3026 3027 /* update lnk by computing fill-in for each pivot row to be merged in */ 3028 prow = jl[k]; /* 1st pivot row */ 3029 3030 while (prow < k){ 3031 nextprow = jl[prow]; 3032 /* merge prow into k-th row */ 3033 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3034 jmax = ui[prow+1]; 3035 ncols = jmax-jmin; 3036 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3037 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3038 nzk += nlnk; 3039 3040 /* update il and jl for prow */ 3041 if (jmin < jmax){ 3042 il[prow] = jmin; 3043 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3044 } 3045 prow = nextprow; 3046 } 3047 3048 /* if free space is not available, make more free space */ 3049 if (current_space->local_remaining<nzk) { 3050 i = am - k + 1; /* num of unfactored rows */ 3051 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3052 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3053 reallocs++; 3054 } 3055 3056 /* copy data into free space, then initialize lnk */ 3057 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3058 3059 /* add the k-th row into il and jl */ 3060 if (nzk-1 > 0){ 3061 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3062 jl[k] = jl[i]; jl[i] = k; 3063 il[k] = ui[k] + 1; 3064 } 3065 ui_ptr[k] = current_space->array; 3066 current_space->array += nzk; 3067 current_space->local_used += nzk; 3068 current_space->local_remaining -= nzk; 3069 3070 ui[k+1] = ui[k] + nzk; 3071 } 3072 3073 #if defined(PETSC_USE_INFO) 3074 if (ai[am] != 0) { 3075 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3076 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3077 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3078 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3079 } else { 3080 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3081 } 3082 #endif 3083 3084 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3085 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3086 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3087 3088 /* destroy list of free space and other temporary array(s) */ 3089 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3090 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3091 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3092 3093 /* put together the new matrix in MATSEQSBAIJ format */ 3094 3095 b = (Mat_SeqSBAIJ*)fact->data; 3096 b->singlemalloc = PETSC_FALSE; 3097 b->free_a = PETSC_TRUE; 3098 b->free_ij = PETSC_TRUE; 3099 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 3100 b->j = uj; 3101 b->i = ui; 3102 b->diag = 0; 3103 b->ilen = 0; 3104 b->imax = 0; 3105 b->row = perm; 3106 b->col = perm; 3107 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3108 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3109 b->icol = iperm; 3110 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3111 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3112 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3113 b->maxnz = b->nz = ui[am]; 3114 3115 fact->info.factor_mallocs = reallocs; 3116 fact->info.fill_ratio_given = fill; 3117 if (ai[am] != 0) { 3118 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3119 } else { 3120 fact->info.fill_ratio_needed = 0.0; 3121 } 3122 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3123 PetscFunctionReturn(0); 3124 } 3125 3126 #undef __FUNCT__ 3127 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3128 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3129 { 3130 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3131 PetscErrorCode ierr; 3132 PetscInt n = A->rmap->n; 3133 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3134 PetscScalar *x,sum; 3135 const PetscScalar *b; 3136 const MatScalar *aa = a->a,*v; 3137 PetscInt i,nz; 3138 3139 PetscFunctionBegin; 3140 if (!n) PetscFunctionReturn(0); 3141 3142 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3143 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3144 3145 /* forward solve the lower triangular */ 3146 x[0] = b[0]; 3147 v = aa; 3148 vi = aj; 3149 for (i=1; i<n; i++) { 3150 nz = ai[i+1] - ai[i]; 3151 sum = b[i]; 3152 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3153 v += nz; 3154 vi += nz; 3155 x[i] = sum; 3156 } 3157 3158 /* backward solve the upper triangular */ 3159 for (i=n-1; i>=0; i--){ 3160 v = aa + adiag[i+1] + 1; 3161 vi = aj + adiag[i+1] + 1; 3162 nz = adiag[i] - adiag[i+1]-1; 3163 sum = x[i]; 3164 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3165 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3166 } 3167 3168 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3169 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3170 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3171 PetscFunctionReturn(0); 3172 } 3173 3174 #undef __FUNCT__ 3175 #define __FUNCT__ "MatSolve_SeqAIJ" 3176 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3177 { 3178 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3179 IS iscol = a->col,isrow = a->row; 3180 PetscErrorCode ierr; 3181 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3182 const PetscInt *rout,*cout,*r,*c; 3183 PetscScalar *x,*tmp,sum; 3184 const PetscScalar *b; 3185 const MatScalar *aa = a->a,*v; 3186 3187 PetscFunctionBegin; 3188 if (!n) PetscFunctionReturn(0); 3189 3190 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3191 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3192 tmp = a->solve_work; 3193 3194 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3195 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3196 3197 /* forward solve the lower triangular */ 3198 tmp[0] = b[r[0]]; 3199 v = aa; 3200 vi = aj; 3201 for (i=1; i<n; i++) { 3202 nz = ai[i+1] - ai[i]; 3203 sum = b[r[i]]; 3204 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3205 tmp[i] = sum; 3206 v += nz; vi += nz; 3207 } 3208 3209 /* backward solve the upper triangular */ 3210 for (i=n-1; i>=0; i--){ 3211 v = aa + adiag[i+1]+1; 3212 vi = aj + adiag[i+1]+1; 3213 nz = adiag[i]-adiag[i+1]-1; 3214 sum = tmp[i]; 3215 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3216 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3217 } 3218 3219 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3220 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3221 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3222 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3223 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3224 PetscFunctionReturn(0); 3225 } 3226 3227 #undef __FUNCT__ 3228 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3229 /* 3230 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3231 */ 3232 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3233 { 3234 Mat B = *fact; 3235 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3236 IS isicol; 3237 PetscErrorCode ierr; 3238 const PetscInt *r,*ic; 3239 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3240 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3241 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3242 PetscInt nlnk,*lnk; 3243 PetscBT lnkbt; 3244 PetscBool row_identity,icol_identity; 3245 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3246 const PetscInt *ics; 3247 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3248 PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount; 3249 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3250 PetscBool missing; 3251 3252 PetscFunctionBegin; 3253 3254 if (dt == PETSC_DEFAULT) dt = 0.005; 3255 if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */ 3256 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3257 3258 /* ------- symbolic factorization, can be reused ---------*/ 3259 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3260 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3261 adiag=a->diag; 3262 3263 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3264 3265 /* bdiag is location of diagonal in factor */ 3266 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3267 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3268 3269 /* allocate row pointers bi */ 3270 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3271 3272 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3273 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3274 nnz_max = ai[n]+2*n*dtcount+2; 3275 3276 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3277 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3278 3279 /* put together the new matrix */ 3280 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3281 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3282 b = (Mat_SeqAIJ*)B->data; 3283 b->free_a = PETSC_TRUE; 3284 b->free_ij = PETSC_TRUE; 3285 b->singlemalloc = PETSC_FALSE; 3286 b->a = ba; 3287 b->j = bj; 3288 b->i = bi; 3289 b->diag = bdiag; 3290 b->ilen = 0; 3291 b->imax = 0; 3292 b->row = isrow; 3293 b->col = iscol; 3294 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3295 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3296 b->icol = isicol; 3297 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3298 3299 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3300 b->maxnz = nnz_max; 3301 3302 B->factortype = MAT_FACTOR_ILUDT; 3303 B->info.factor_mallocs = 0; 3304 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3305 CHKMEMQ; 3306 /* ------- end of symbolic factorization ---------*/ 3307 3308 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3309 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3310 ics = ic; 3311 3312 /* linked list for storing column indices of the active row */ 3313 nlnk = n + 1; 3314 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3315 3316 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3317 ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr); 3318 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3319 ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr); 3320 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3321 3322 bi[0] = 0; 3323 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3324 bdiag_rev[n] = bdiag[0]; 3325 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3326 for (i=0; i<n; i++) { 3327 /* copy initial fill into linked list */ 3328 nzi = 0; /* nonzeros for active row i */ 3329 nzi = ai[r[i]+1] - ai[r[i]]; 3330 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3331 nzi_al = adiag[r[i]] - ai[r[i]]; 3332 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3333 ajtmp = aj + ai[r[i]]; 3334 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3335 3336 /* load in initial (unfactored row) */ 3337 aatmp = a->a + ai[r[i]]; 3338 for (j=0; j<nzi; j++) { 3339 rtmp[ics[*ajtmp++]] = *aatmp++; 3340 } 3341 3342 /* add pivot rows into linked list */ 3343 row = lnk[n]; 3344 while (row < i ) { 3345 nzi_bl = bi[row+1] - bi[row] + 1; 3346 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3347 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3348 nzi += nlnk; 3349 row = lnk[row]; 3350 } 3351 3352 /* copy data from lnk into jtmp, then initialize lnk */ 3353 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3354 3355 /* numerical factorization */ 3356 bjtmp = jtmp; 3357 row = *bjtmp++; /* 1st pivot row */ 3358 while ( row < i ) { 3359 pc = rtmp + row; 3360 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3361 multiplier = (*pc) * (*pv); 3362 *pc = multiplier; 3363 if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */ 3364 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3365 pv = ba + bdiag[row+1] + 1; 3366 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3367 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3368 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3369 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 3370 } 3371 row = *bjtmp++; 3372 } 3373 3374 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3375 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3376 nzi_bl = 0; j = 0; 3377 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 3378 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3379 nzi_bl++; j++; 3380 } 3381 nzi_bu = nzi - nzi_bl -1; 3382 while (j < nzi){ 3383 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3384 j++; 3385 } 3386 3387 bjtmp = bj + bi[i]; 3388 batmp = ba + bi[i]; 3389 /* apply level dropping rule to L part */ 3390 ncut = nzi_al + dtcount; 3391 if (ncut < nzi_bl){ 3392 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3393 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3394 } else { 3395 ncut = nzi_bl; 3396 } 3397 for (j=0; j<ncut; j++){ 3398 bjtmp[j] = jtmp[j]; 3399 batmp[j] = vtmp[j]; 3400 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3401 } 3402 bi[i+1] = bi[i] + ncut; 3403 nzi = ncut + 1; 3404 3405 /* apply level dropping rule to U part */ 3406 ncut = nzi_au + dtcount; 3407 if (ncut < nzi_bu){ 3408 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3409 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3410 } else { 3411 ncut = nzi_bu; 3412 } 3413 nzi += ncut; 3414 3415 /* mark bdiagonal */ 3416 bdiag[i+1] = bdiag[i] - (ncut + 1); 3417 bdiag_rev[n-i-1] = bdiag[i+1]; 3418 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3419 bjtmp = bj + bdiag[i]; 3420 batmp = ba + bdiag[i]; 3421 *bjtmp = i; 3422 *batmp = diag_tmp; /* rtmp[i]; */ 3423 if (*batmp == 0.0) { 3424 *batmp = dt+shift; 3425 /* printf(" row %d add shift %g\n",i,shift); */ 3426 } 3427 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3428 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3429 3430 bjtmp = bj + bdiag[i+1]+1; 3431 batmp = ba + bdiag[i+1]+1; 3432 for (k=0; k<ncut; k++){ 3433 bjtmp[k] = jtmp[nzi_bl+1+k]; 3434 batmp[k] = vtmp[nzi_bl+1+k]; 3435 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3436 } 3437 /* printf("\n"); */ 3438 3439 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3440 /* 3441 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3442 printf(" ----------------------------\n"); 3443 */ 3444 } /* for (i=0; i<n; i++) */ 3445 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3446 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3447 3448 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3449 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3450 3451 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3452 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3453 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3454 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3455 3456 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3457 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3458 3459 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3460 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3461 if (row_identity && icol_identity) { 3462 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3463 } else { 3464 B->ops->solve = MatSolve_SeqAIJ; 3465 } 3466 3467 B->ops->solveadd = 0; 3468 B->ops->solvetranspose = 0; 3469 B->ops->solvetransposeadd = 0; 3470 B->ops->matsolve = 0; 3471 B->assembled = PETSC_TRUE; 3472 B->preallocated = PETSC_TRUE; 3473 PetscFunctionReturn(0); 3474 } 3475 3476 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3477 #undef __FUNCT__ 3478 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3479 /* 3480 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3481 */ 3482 3483 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3484 { 3485 PetscErrorCode ierr; 3486 3487 PetscFunctionBegin; 3488 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3489 PetscFunctionReturn(0); 3490 } 3491 3492 /* 3493 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3494 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3495 */ 3496 #undef __FUNCT__ 3497 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3498 /* 3499 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3500 */ 3501 3502 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3503 { 3504 Mat C=fact; 3505 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 3506 IS isrow = b->row,isicol = b->icol; 3507 PetscErrorCode ierr; 3508 const PetscInt *r,*ic,*ics; 3509 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3510 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3511 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3512 PetscReal dt=info->dt,shift=info->shiftamount; 3513 PetscBool row_identity, col_identity; 3514 3515 PetscFunctionBegin; 3516 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3517 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3518 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3519 ics = ic; 3520 3521 for (i=0; i<n; i++){ 3522 /* initialize rtmp array */ 3523 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3524 bjtmp = bj + bi[i]; 3525 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3526 rtmp[i] = 0.0; 3527 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3528 bjtmp = bj + bdiag[i+1] + 1; 3529 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3530 3531 /* load in initial unfactored row of A */ 3532 /* printf("row %d\n",i); */ 3533 nz = ai[r[i]+1] - ai[r[i]]; 3534 ajtmp = aj + ai[r[i]]; 3535 v = aa + ai[r[i]]; 3536 for (j=0; j<nz; j++) { 3537 rtmp[ics[*ajtmp++]] = v[j]; 3538 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3539 } 3540 /* printf("\n"); */ 3541 3542 /* numerical factorization */ 3543 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3544 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3545 k = 0; 3546 while (k < nzl){ 3547 row = *bjtmp++; 3548 /* printf(" prow %d\n",row); */ 3549 pc = rtmp + row; 3550 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3551 multiplier = (*pc) * (*pv); 3552 *pc = multiplier; 3553 if (PetscAbsScalar(multiplier) > dt){ 3554 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3555 pv = b->a + bdiag[row+1] + 1; 3556 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3557 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3558 /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */ 3559 } 3560 k++; 3561 } 3562 3563 /* finished row so stick it into b->a */ 3564 /* L-part */ 3565 pv = b->a + bi[i] ; 3566 pj = bj + bi[i] ; 3567 nzl = bi[i+1] - bi[i]; 3568 for (j=0; j<nzl; j++) { 3569 pv[j] = rtmp[pj[j]]; 3570 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3571 } 3572 3573 /* diagonal: invert diagonal entries for simplier triangular solves */ 3574 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3575 b->a[bdiag[i]] = 1.0/rtmp[i]; 3576 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3577 3578 /* U-part */ 3579 pv = b->a + bdiag[i+1] + 1; 3580 pj = bj + bdiag[i+1] + 1; 3581 nzu = bdiag[i] - bdiag[i+1] - 1; 3582 for (j=0; j<nzu; j++) { 3583 pv[j] = rtmp[pj[j]]; 3584 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3585 } 3586 /* printf("\n"); */ 3587 } 3588 3589 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3590 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3591 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3592 3593 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3594 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3595 if (row_identity && col_identity) { 3596 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3597 } else { 3598 C->ops->solve = MatSolve_SeqAIJ; 3599 } 3600 C->ops->solveadd = 0; 3601 C->ops->solvetranspose = 0; 3602 C->ops->solvetransposeadd = 0; 3603 C->ops->matsolve = 0; 3604 C->assembled = PETSC_TRUE; 3605 C->preallocated = PETSC_TRUE; 3606 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3607 PetscFunctionReturn(0); 3608 } 3609