1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 9 /* 10 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 11 12 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 13 */ 14 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 15 { 16 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 19 const PetscInt *ai = a->i, *aj = a->j; 20 const PetscScalar *aa = a->a; 21 PetscBool *done; 22 PetscReal best,past = 0,future; 23 24 PetscFunctionBegin; 25 /* pick initial row */ 26 best = -1; 27 for (i=0; i<n; i++) { 28 future = 0.0; 29 for (j=ai[i]; j<ai[i+1]; j++) { 30 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 31 else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc1(n,&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 42 ierr = PetscMalloc1(n,&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 96 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 97 { 98 PetscFunctionBegin; 99 *flg = PETSC_TRUE; 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 105 PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 106 { 107 PetscInt n = A->rmap->n; 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 #if defined(PETSC_USE_COMPLEX) 112 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 113 #endif 114 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 115 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 116 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 117 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 118 119 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 120 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 121 122 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 123 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 124 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 125 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 126 127 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 128 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 129 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 130 (*B)->factortype = ftype; 131 PetscFunctionReturn(0); 132 } 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 137 { 138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 139 IS isicol; 140 PetscErrorCode ierr; 141 const PetscInt *r,*ic; 142 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 143 PetscInt *bi,*bj,*ajtmp; 144 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 145 PetscReal f; 146 PetscInt nlnk,*lnk,k,**bi_ptr; 147 PetscFreeSpaceList free_space=NULL,current_space=NULL; 148 PetscBT lnkbt; 149 150 PetscFunctionBegin; 151 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 152 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 153 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 154 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 155 156 /* get new row pointers */ 157 ierr = PetscMalloc1((n+1),&bi);CHKERRQ(ierr); 158 bi[0] = 0; 159 160 /* bdiag is location of diagonal in factor */ 161 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); 162 bdiag[0] = 0; 163 164 /* linked list for storing column indices of the active row */ 165 nlnk = n + 1; 166 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 167 168 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 169 170 /* initial FreeSpace size is f*(ai[n]+1) */ 171 f = info->fill; 172 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 173 current_space = free_space; 174 175 for (i=0; i<n; i++) { 176 /* copy previous fill into linked list */ 177 nzi = 0; 178 nnz = ai[r[i]+1] - ai[r[i]]; 179 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 180 ajtmp = aj + ai[r[i]]; 181 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 182 nzi += nlnk; 183 184 /* add pivot rows into linked list */ 185 row = lnk[n]; 186 while (row < i) { 187 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 188 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 189 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 190 nzi += nlnk; 191 row = lnk[row]; 192 } 193 bi[i+1] = bi[i] + nzi; 194 im[i] = nzi; 195 196 /* mark bdiag */ 197 nzbd = 0; 198 nnz = nzi; 199 k = lnk[n]; 200 while (nnz-- && k < i) { 201 nzbd++; 202 k = lnk[k]; 203 } 204 bdiag[i] = bi[i] + nzbd; 205 206 /* if free space is not available, make more free space */ 207 if (current_space->local_remaining<nzi) { 208 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 209 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 210 reallocs++; 211 } 212 213 /* copy data into free space, then initialize lnk */ 214 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 215 216 bi_ptr[i] = current_space->array; 217 current_space->array += nzi; 218 current_space->local_used += nzi; 219 current_space->local_remaining -= nzi; 220 } 221 #if defined(PETSC_USE_INFO) 222 if (ai[n] != 0) { 223 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 224 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 225 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 226 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 227 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 228 } else { 229 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 230 } 231 #endif 232 233 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 234 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 235 236 /* destroy list of free space and other temporary array(s) */ 237 ierr = PetscMalloc1((bi[n]+1),&bj);CHKERRQ(ierr); 238 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 239 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 240 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 241 242 /* put together the new matrix */ 243 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 244 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 245 b = (Mat_SeqAIJ*)(B)->data; 246 247 b->free_a = PETSC_TRUE; 248 b->free_ij = PETSC_TRUE; 249 b->singlemalloc = PETSC_FALSE; 250 251 ierr = PetscMalloc1((bi[n]+1),&b->a);CHKERRQ(ierr); 252 b->j = bj; 253 b->i = bi; 254 b->diag = bdiag; 255 b->ilen = 0; 256 b->imax = 0; 257 b->row = isrow; 258 b->col = iscol; 259 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 260 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 261 b->icol = isicol; 262 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 263 264 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 265 ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 266 b->maxnz = b->nz = bi[n]; 267 268 (B)->factortype = MAT_FACTOR_LU; 269 (B)->info.factor_mallocs = reallocs; 270 (B)->info.fill_ratio_given = f; 271 272 if (ai[n]) { 273 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 274 } else { 275 (B)->info.fill_ratio_needed = 0.0; 276 } 277 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 278 if (a->inode.size) { 279 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 280 } 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 286 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 287 { 288 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 289 IS isicol; 290 PetscErrorCode ierr; 291 const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp; 292 PetscInt i,n=A->rmap->n; 293 PetscInt *bi,*bj; 294 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 295 PetscReal f; 296 PetscInt nlnk,*lnk,k,**bi_ptr; 297 PetscFreeSpaceList free_space=NULL,current_space=NULL; 298 PetscBT lnkbt; 299 300 PetscFunctionBegin; 301 /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 302 /* 303 PetscBool olddatastruct=PETSC_FALSE; 304 ierr = PetscOptionsGetBool(NULL,"-lu_old",&olddatastruct,NULL);CHKERRQ(ierr); 305 if (olddatastruct) { 306 ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 */ 310 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 311 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 312 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 313 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 314 315 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 316 ierr = PetscMalloc1((n+1),&bi);CHKERRQ(ierr); 317 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); 318 bi[0] = bdiag[0] = 0; 319 320 /* linked list for storing column indices of the active row */ 321 nlnk = n + 1; 322 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 323 324 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 325 326 /* initial FreeSpace size is f*(ai[n]+1) */ 327 f = info->fill; 328 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 329 current_space = free_space; 330 331 for (i=0; i<n; i++) { 332 /* copy previous fill into linked list */ 333 nzi = 0; 334 nnz = ai[r[i]+1] - ai[r[i]]; 335 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 336 ajtmp = aj + ai[r[i]]; 337 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 338 nzi += nlnk; 339 340 /* add pivot rows into linked list */ 341 row = lnk[n]; 342 while (row < i) { 343 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 344 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 345 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 346 nzi += nlnk; 347 row = lnk[row]; 348 } 349 bi[i+1] = bi[i] + nzi; 350 im[i] = nzi; 351 352 /* mark bdiag */ 353 nzbd = 0; 354 nnz = nzi; 355 k = lnk[n]; 356 while (nnz-- && k < i) { 357 nzbd++; 358 k = lnk[k]; 359 } 360 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 361 362 /* if free space is not available, make more free space */ 363 if (current_space->local_remaining<nzi) { 364 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 365 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 366 reallocs++; 367 } 368 369 /* copy data into free space, then initialize lnk */ 370 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 371 372 bi_ptr[i] = current_space->array; 373 current_space->array += nzi; 374 current_space->local_used += nzi; 375 current_space->local_remaining -= nzi; 376 } 377 378 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 379 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 380 381 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 382 ierr = PetscMalloc1((bi[n]+1),&bj);CHKERRQ(ierr); 383 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 384 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 385 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 386 387 /* put together the new matrix */ 388 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 389 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 390 b = (Mat_SeqAIJ*)(B)->data; 391 392 b->free_a = PETSC_TRUE; 393 b->free_ij = PETSC_TRUE; 394 b->singlemalloc = PETSC_FALSE; 395 396 ierr = PetscMalloc1((bdiag[0]+1),&b->a);CHKERRQ(ierr); 397 398 b->j = bj; 399 b->i = bi; 400 b->diag = bdiag; 401 b->ilen = 0; 402 b->imax = 0; 403 b->row = isrow; 404 b->col = iscol; 405 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 406 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 407 b->icol = isicol; 408 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 409 410 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 411 ierr = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 412 b->maxnz = b->nz = bdiag[0]+1; 413 414 B->factortype = MAT_FACTOR_LU; 415 B->info.factor_mallocs = reallocs; 416 B->info.fill_ratio_given = f; 417 418 if (ai[n]) { 419 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 420 } else { 421 B->info.fill_ratio_needed = 0.0; 422 } 423 #if defined(PETSC_USE_INFO) 424 if (ai[n] != 0) { 425 PetscReal af = B->info.fill_ratio_needed; 426 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 427 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 428 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 429 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 430 } else { 431 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 432 } 433 #endif 434 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 435 if (a->inode.size) { 436 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 437 } 438 ierr = Mat_CheckInode_FactorLU(B);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 /* 443 Trouble in factorization, should we dump the original matrix? 444 */ 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatFactorDumpMatrix" 447 PetscErrorCode MatFactorDumpMatrix(Mat A) 448 { 449 PetscErrorCode ierr; 450 PetscBool flg = PETSC_FALSE; 451 452 PetscFunctionBegin; 453 ierr = PetscOptionsGetBool(NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr); 454 if (flg) { 455 PetscViewer viewer; 456 char filename[PETSC_MAX_PATH_LEN]; 457 458 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 459 ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 460 ierr = MatView(A,viewer);CHKERRQ(ierr); 461 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 462 } 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 468 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 469 { 470 Mat C =B; 471 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 472 IS isrow = b->row,isicol = b->icol; 473 PetscErrorCode ierr; 474 const PetscInt *r,*ic,*ics; 475 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 476 PetscInt i,j,k,nz,nzL,row,*pj; 477 const PetscInt *ajtmp,*bjtmp; 478 MatScalar *rtmp,*pc,multiplier,*pv; 479 const MatScalar *aa=a->a,*v; 480 PetscBool row_identity,col_identity; 481 FactorShiftCtx sctx; 482 const PetscInt *ddiag; 483 PetscReal rs; 484 MatScalar d; 485 486 PetscFunctionBegin; 487 /* MatPivotSetUp(): initialize shift context sctx */ 488 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 489 490 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 491 ddiag = a->diag; 492 sctx.shift_top = info->zeropivot; 493 for (i=0; i<n; i++) { 494 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 495 d = (aa)[ddiag[i]]; 496 rs = -PetscAbsScalar(d) - PetscRealPart(d); 497 v = aa+ai[i]; 498 nz = ai[i+1] - ai[i]; 499 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 500 if (rs>sctx.shift_top) sctx.shift_top = rs; 501 } 502 sctx.shift_top *= 1.1; 503 sctx.nshift_max = 5; 504 sctx.shift_lo = 0.; 505 sctx.shift_hi = 1.; 506 } 507 508 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 509 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 510 ierr = PetscMalloc1((n+1),&rtmp);CHKERRQ(ierr); 511 ics = ic; 512 513 do { 514 sctx.newshift = PETSC_FALSE; 515 for (i=0; i<n; i++) { 516 /* zero rtmp */ 517 /* L part */ 518 nz = bi[i+1] - bi[i]; 519 bjtmp = bj + bi[i]; 520 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 521 522 /* U part */ 523 nz = bdiag[i]-bdiag[i+1]; 524 bjtmp = bj + bdiag[i+1]+1; 525 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 526 527 /* load in initial (unfactored row) */ 528 nz = ai[r[i]+1] - ai[r[i]]; 529 ajtmp = aj + ai[r[i]]; 530 v = aa + ai[r[i]]; 531 for (j=0; j<nz; j++) { 532 rtmp[ics[ajtmp[j]]] = v[j]; 533 } 534 /* ZeropivotApply() */ 535 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 536 537 /* elimination */ 538 bjtmp = bj + bi[i]; 539 row = *bjtmp++; 540 nzL = bi[i+1] - bi[i]; 541 for (k=0; k < nzL; k++) { 542 pc = rtmp + row; 543 if (*pc != 0.0) { 544 pv = b->a + bdiag[row]; 545 multiplier = *pc * (*pv); 546 *pc = multiplier; 547 548 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 549 pv = b->a + bdiag[row+1]+1; 550 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 551 552 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 553 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 554 } 555 row = *bjtmp++; 556 } 557 558 /* finished row so stick it into b->a */ 559 rs = 0.0; 560 /* L part */ 561 pv = b->a + bi[i]; 562 pj = b->j + bi[i]; 563 nz = bi[i+1] - bi[i]; 564 for (j=0; j<nz; j++) { 565 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 566 } 567 568 /* U part */ 569 pv = b->a + bdiag[i+1]+1; 570 pj = b->j + bdiag[i+1]+1; 571 nz = bdiag[i] - bdiag[i+1]-1; 572 for (j=0; j<nz; j++) { 573 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 574 } 575 576 sctx.rs = rs; 577 sctx.pv = rtmp[i]; 578 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 579 if (sctx.newshift) break; /* break for-loop */ 580 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 581 582 /* Mark diagonal and invert diagonal for simplier triangular solves */ 583 pv = b->a + bdiag[i]; 584 *pv = 1.0/rtmp[i]; 585 586 } /* endof for (i=0; i<n; i++) { */ 587 588 /* MatPivotRefine() */ 589 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 590 /* 591 * if no shift in this attempt & shifting & started shifting & can refine, 592 * then try lower shift 593 */ 594 sctx.shift_hi = sctx.shift_fraction; 595 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 596 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 597 sctx.newshift = PETSC_TRUE; 598 sctx.nshift++; 599 } 600 } while (sctx.newshift); 601 602 ierr = PetscFree(rtmp);CHKERRQ(ierr); 603 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 604 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 605 606 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 607 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 608 if (b->inode.size) { 609 C->ops->solve = MatSolve_SeqAIJ_Inode; 610 } else if (row_identity && col_identity) { 611 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 612 } else { 613 C->ops->solve = MatSolve_SeqAIJ; 614 } 615 C->ops->solveadd = MatSolveAdd_SeqAIJ; 616 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 617 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 618 C->ops->matsolve = MatMatSolve_SeqAIJ; 619 C->assembled = PETSC_TRUE; 620 C->preallocated = PETSC_TRUE; 621 622 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 623 624 /* MatShiftView(A,info,&sctx) */ 625 if (sctx.nshift) { 626 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 627 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 628 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 629 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 630 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 631 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 632 } 633 } 634 PetscFunctionReturn(0); 635 } 636 637 #undef __FUNCT__ 638 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 639 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 640 { 641 Mat C =B; 642 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 643 IS isrow = b->row,isicol = b->icol; 644 PetscErrorCode ierr; 645 const PetscInt *r,*ic,*ics; 646 PetscInt nz,row,i,j,n=A->rmap->n,diag; 647 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 648 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 649 MatScalar *pv,*rtmp,*pc,multiplier,d; 650 const MatScalar *v,*aa=a->a; 651 PetscReal rs=0.0; 652 FactorShiftCtx sctx; 653 const PetscInt *ddiag; 654 PetscBool row_identity, col_identity; 655 656 PetscFunctionBegin; 657 /* MatPivotSetUp(): initialize shift context sctx */ 658 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 659 660 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 661 ddiag = a->diag; 662 sctx.shift_top = info->zeropivot; 663 for (i=0; i<n; i++) { 664 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 665 d = (aa)[ddiag[i]]; 666 rs = -PetscAbsScalar(d) - PetscRealPart(d); 667 v = aa+ai[i]; 668 nz = ai[i+1] - ai[i]; 669 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 670 if (rs>sctx.shift_top) sctx.shift_top = rs; 671 } 672 sctx.shift_top *= 1.1; 673 sctx.nshift_max = 5; 674 sctx.shift_lo = 0.; 675 sctx.shift_hi = 1.; 676 } 677 678 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 679 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 680 ierr = PetscMalloc1((n+1),&rtmp);CHKERRQ(ierr); 681 ics = ic; 682 683 do { 684 sctx.newshift = PETSC_FALSE; 685 for (i=0; i<n; i++) { 686 nz = bi[i+1] - bi[i]; 687 bjtmp = bj + bi[i]; 688 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 689 690 /* load in initial (unfactored row) */ 691 nz = ai[r[i]+1] - ai[r[i]]; 692 ajtmp = aj + ai[r[i]]; 693 v = aa + ai[r[i]]; 694 for (j=0; j<nz; j++) { 695 rtmp[ics[ajtmp[j]]] = v[j]; 696 } 697 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 698 699 row = *bjtmp++; 700 while (row < i) { 701 pc = rtmp + row; 702 if (*pc != 0.0) { 703 pv = b->a + diag_offset[row]; 704 pj = b->j + diag_offset[row] + 1; 705 multiplier = *pc / *pv++; 706 *pc = multiplier; 707 nz = bi[row+1] - diag_offset[row] - 1; 708 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 709 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 710 } 711 row = *bjtmp++; 712 } 713 /* finished row so stick it into b->a */ 714 pv = b->a + bi[i]; 715 pj = b->j + bi[i]; 716 nz = bi[i+1] - bi[i]; 717 diag = diag_offset[i] - bi[i]; 718 rs = 0.0; 719 for (j=0; j<nz; j++) { 720 pv[j] = rtmp[pj[j]]; 721 rs += PetscAbsScalar(pv[j]); 722 } 723 rs -= PetscAbsScalar(pv[diag]); 724 725 sctx.rs = rs; 726 sctx.pv = pv[diag]; 727 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 728 if (sctx.newshift) break; 729 pv[diag] = sctx.pv; 730 } 731 732 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 733 /* 734 * if no shift in this attempt & shifting & started shifting & can refine, 735 * then try lower shift 736 */ 737 sctx.shift_hi = sctx.shift_fraction; 738 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 739 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 740 sctx.newshift = PETSC_TRUE; 741 sctx.nshift++; 742 } 743 } while (sctx.newshift); 744 745 /* invert diagonal entries for simplier triangular solves */ 746 for (i=0; i<n; i++) { 747 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 748 } 749 ierr = PetscFree(rtmp);CHKERRQ(ierr); 750 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 751 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 752 753 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 754 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 755 if (row_identity && col_identity) { 756 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 757 } else { 758 C->ops->solve = MatSolve_SeqAIJ_inplace; 759 } 760 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 761 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 762 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 763 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 764 765 C->assembled = PETSC_TRUE; 766 C->preallocated = PETSC_TRUE; 767 768 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 769 if (sctx.nshift) { 770 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 771 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 772 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 773 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 774 } 775 } 776 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 777 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 778 779 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 780 PetscFunctionReturn(0); 781 } 782 783 /* 784 This routine implements inplace ILU(0) with row or/and column permutations. 785 Input: 786 A - original matrix 787 Output; 788 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 789 a->j (col index) is permuted by the inverse of colperm, then sorted 790 a->a reordered accordingly with a->j 791 a->diag (ptr to diagonal elements) is updated. 792 */ 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 795 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 796 { 797 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 798 IS isrow = a->row,isicol = a->icol; 799 PetscErrorCode ierr; 800 const PetscInt *r,*ic,*ics; 801 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 802 PetscInt *ajtmp,nz,row; 803 PetscInt *diag = a->diag,nbdiag,*pj; 804 PetscScalar *rtmp,*pc,multiplier,d; 805 MatScalar *pv,*v; 806 PetscReal rs; 807 FactorShiftCtx sctx; 808 const MatScalar *aa=a->a,*vtmp; 809 810 PetscFunctionBegin; 811 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 812 813 /* MatPivotSetUp(): initialize shift context sctx */ 814 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 815 816 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 817 const PetscInt *ddiag = a->diag; 818 sctx.shift_top = info->zeropivot; 819 for (i=0; i<n; i++) { 820 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 821 d = (aa)[ddiag[i]]; 822 rs = -PetscAbsScalar(d) - PetscRealPart(d); 823 vtmp = aa+ai[i]; 824 nz = ai[i+1] - ai[i]; 825 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 826 if (rs>sctx.shift_top) sctx.shift_top = rs; 827 } 828 sctx.shift_top *= 1.1; 829 sctx.nshift_max = 5; 830 sctx.shift_lo = 0.; 831 sctx.shift_hi = 1.; 832 } 833 834 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 835 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 836 ierr = PetscMalloc1((n+1),&rtmp);CHKERRQ(ierr); 837 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 838 ics = ic; 839 840 #if defined(MV) 841 sctx.shift_top = 0.; 842 sctx.nshift_max = 0; 843 sctx.shift_lo = 0.; 844 sctx.shift_hi = 0.; 845 sctx.shift_fraction = 0.; 846 847 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 848 sctx.shift_top = 0.; 849 for (i=0; i<n; i++) { 850 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 851 d = (a->a)[diag[i]]; 852 rs = -PetscAbsScalar(d) - PetscRealPart(d); 853 v = a->a+ai[i]; 854 nz = ai[i+1] - ai[i]; 855 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 856 if (rs>sctx.shift_top) sctx.shift_top = rs; 857 } 858 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 859 sctx.shift_top *= 1.1; 860 sctx.nshift_max = 5; 861 sctx.shift_lo = 0.; 862 sctx.shift_hi = 1.; 863 } 864 865 sctx.shift_amount = 0.; 866 sctx.nshift = 0; 867 #endif 868 869 do { 870 sctx.newshift = PETSC_FALSE; 871 for (i=0; i<n; i++) { 872 /* load in initial unfactored row */ 873 nz = ai[r[i]+1] - ai[r[i]]; 874 ajtmp = aj + ai[r[i]]; 875 v = a->a + ai[r[i]]; 876 /* sort permuted ajtmp and values v accordingly */ 877 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 878 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 879 880 diag[r[i]] = ai[r[i]]; 881 for (j=0; j<nz; j++) { 882 rtmp[ajtmp[j]] = v[j]; 883 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 884 } 885 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 886 887 row = *ajtmp++; 888 while (row < i) { 889 pc = rtmp + row; 890 if (*pc != 0.0) { 891 pv = a->a + diag[r[row]]; 892 pj = aj + diag[r[row]] + 1; 893 894 multiplier = *pc / *pv++; 895 *pc = multiplier; 896 nz = ai[r[row]+1] - diag[r[row]] - 1; 897 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 898 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 899 } 900 row = *ajtmp++; 901 } 902 /* finished row so overwrite it onto a->a */ 903 pv = a->a + ai[r[i]]; 904 pj = aj + ai[r[i]]; 905 nz = ai[r[i]+1] - ai[r[i]]; 906 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 907 908 rs = 0.0; 909 for (j=0; j<nz; j++) { 910 pv[j] = rtmp[pj[j]]; 911 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 912 } 913 914 sctx.rs = rs; 915 sctx.pv = pv[nbdiag]; 916 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 917 if (sctx.newshift) break; 918 pv[nbdiag] = sctx.pv; 919 } 920 921 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 922 /* 923 * if no shift in this attempt & shifting & started shifting & can refine, 924 * then try lower shift 925 */ 926 sctx.shift_hi = sctx.shift_fraction; 927 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 928 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 929 sctx.newshift = PETSC_TRUE; 930 sctx.nshift++; 931 } 932 } while (sctx.newshift); 933 934 /* invert diagonal entries for simplier triangular solves */ 935 for (i=0; i<n; i++) { 936 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 937 } 938 939 ierr = PetscFree(rtmp);CHKERRQ(ierr); 940 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 941 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 942 943 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 944 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 945 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 946 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 947 948 A->assembled = PETSC_TRUE; 949 A->preallocated = PETSC_TRUE; 950 951 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 952 if (sctx.nshift) { 953 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 954 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 955 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 956 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 957 } 958 } 959 PetscFunctionReturn(0); 960 } 961 962 /* ----------------------------------------------------------- */ 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatLUFactor_SeqAIJ" 965 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 966 { 967 PetscErrorCode ierr; 968 Mat C; 969 970 PetscFunctionBegin; 971 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 972 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 973 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 974 975 A->ops->solve = C->ops->solve; 976 A->ops->solvetranspose = C->ops->solvetranspose; 977 978 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 979 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 /* ----------------------------------------------------------- */ 983 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 987 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 988 { 989 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 990 IS iscol = a->col,isrow = a->row; 991 PetscErrorCode ierr; 992 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 993 PetscInt nz; 994 const PetscInt *rout,*cout,*r,*c; 995 PetscScalar *x,*tmp,*tmps,sum; 996 const PetscScalar *b; 997 const MatScalar *aa = a->a,*v; 998 999 PetscFunctionBegin; 1000 if (!n) PetscFunctionReturn(0); 1001 1002 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1003 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1004 tmp = a->solve_work; 1005 1006 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1007 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1008 1009 /* forward solve the lower triangular */ 1010 tmp[0] = b[*r++]; 1011 tmps = tmp; 1012 for (i=1; i<n; i++) { 1013 v = aa + ai[i]; 1014 vi = aj + ai[i]; 1015 nz = a->diag[i] - ai[i]; 1016 sum = b[*r++]; 1017 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1018 tmp[i] = sum; 1019 } 1020 1021 /* backward solve the upper triangular */ 1022 for (i=n-1; i>=0; i--) { 1023 v = aa + a->diag[i] + 1; 1024 vi = aj + a->diag[i] + 1; 1025 nz = ai[i+1] - a->diag[i] - 1; 1026 sum = tmp[i]; 1027 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1028 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1029 } 1030 1031 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1032 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1033 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1034 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1035 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1041 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1042 { 1043 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1044 IS iscol = a->col,isrow = a->row; 1045 PetscErrorCode ierr; 1046 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1047 PetscInt nz,neq; 1048 const PetscInt *rout,*cout,*r,*c; 1049 PetscScalar *x,*b,*tmp,*tmps,sum; 1050 const MatScalar *aa = a->a,*v; 1051 PetscBool bisdense,xisdense; 1052 1053 PetscFunctionBegin; 1054 if (!n) PetscFunctionReturn(0); 1055 1056 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1057 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1058 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1059 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1060 1061 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1062 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1063 1064 tmp = a->solve_work; 1065 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1066 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1067 1068 for (neq=0; neq<B->cmap->n; neq++) { 1069 /* forward solve the lower triangular */ 1070 tmp[0] = b[r[0]]; 1071 tmps = tmp; 1072 for (i=1; i<n; i++) { 1073 v = aa + ai[i]; 1074 vi = aj + ai[i]; 1075 nz = a->diag[i] - ai[i]; 1076 sum = b[r[i]]; 1077 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1078 tmp[i] = sum; 1079 } 1080 /* backward solve the upper triangular */ 1081 for (i=n-1; i>=0; i--) { 1082 v = aa + a->diag[i] + 1; 1083 vi = aj + a->diag[i] + 1; 1084 nz = ai[i+1] - a->diag[i] - 1; 1085 sum = tmp[i]; 1086 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1087 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1088 } 1089 1090 b += n; 1091 x += n; 1092 } 1093 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1094 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1095 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1096 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1097 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1103 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1104 { 1105 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1106 IS iscol = a->col,isrow = a->row; 1107 PetscErrorCode ierr; 1108 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1109 PetscInt nz,neq; 1110 const PetscInt *rout,*cout,*r,*c; 1111 PetscScalar *x,*b,*tmp,sum; 1112 const MatScalar *aa = a->a,*v; 1113 PetscBool bisdense,xisdense; 1114 1115 PetscFunctionBegin; 1116 if (!n) PetscFunctionReturn(0); 1117 1118 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1119 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1120 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1121 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1122 1123 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1124 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1125 1126 tmp = a->solve_work; 1127 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1128 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1129 1130 for (neq=0; neq<B->cmap->n; neq++) { 1131 /* forward solve the lower triangular */ 1132 tmp[0] = b[r[0]]; 1133 v = aa; 1134 vi = aj; 1135 for (i=1; i<n; i++) { 1136 nz = ai[i+1] - ai[i]; 1137 sum = b[r[i]]; 1138 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1139 tmp[i] = sum; 1140 v += nz; vi += nz; 1141 } 1142 1143 /* backward solve the upper triangular */ 1144 for (i=n-1; i>=0; i--) { 1145 v = aa + adiag[i+1]+1; 1146 vi = aj + adiag[i+1]+1; 1147 nz = adiag[i]-adiag[i+1]-1; 1148 sum = tmp[i]; 1149 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1150 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1151 } 1152 1153 b += n; 1154 x += n; 1155 } 1156 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1157 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1158 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1159 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1160 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1166 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1167 { 1168 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1169 IS iscol = a->col,isrow = a->row; 1170 PetscErrorCode ierr; 1171 const PetscInt *r,*c,*rout,*cout; 1172 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1173 PetscInt nz,row; 1174 PetscScalar *x,*b,*tmp,*tmps,sum; 1175 const MatScalar *aa = a->a,*v; 1176 1177 PetscFunctionBegin; 1178 if (!n) PetscFunctionReturn(0); 1179 1180 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1181 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1182 tmp = a->solve_work; 1183 1184 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1185 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1186 1187 /* forward solve the lower triangular */ 1188 tmp[0] = b[*r++]; 1189 tmps = tmp; 1190 for (row=1; row<n; row++) { 1191 i = rout[row]; /* permuted row */ 1192 v = aa + ai[i]; 1193 vi = aj + ai[i]; 1194 nz = a->diag[i] - ai[i]; 1195 sum = b[*r++]; 1196 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1197 tmp[row] = sum; 1198 } 1199 1200 /* backward solve the upper triangular */ 1201 for (row=n-1; row>=0; row--) { 1202 i = rout[row]; /* permuted row */ 1203 v = aa + a->diag[i] + 1; 1204 vi = aj + a->diag[i] + 1; 1205 nz = ai[i+1] - a->diag[i] - 1; 1206 sum = tmp[row]; 1207 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1208 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1209 } 1210 1211 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1212 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1213 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1215 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1216 PetscFunctionReturn(0); 1217 } 1218 1219 /* ----------------------------------------------------------- */ 1220 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1223 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1224 { 1225 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1226 PetscErrorCode ierr; 1227 PetscInt n = A->rmap->n; 1228 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1229 PetscScalar *x; 1230 const PetscScalar *b; 1231 const MatScalar *aa = a->a; 1232 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1233 PetscInt adiag_i,i,nz,ai_i; 1234 const PetscInt *vi; 1235 const MatScalar *v; 1236 PetscScalar sum; 1237 #endif 1238 1239 PetscFunctionBegin; 1240 if (!n) PetscFunctionReturn(0); 1241 1242 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1244 1245 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1246 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1247 #else 1248 /* forward solve the lower triangular */ 1249 x[0] = b[0]; 1250 for (i=1; i<n; i++) { 1251 ai_i = ai[i]; 1252 v = aa + ai_i; 1253 vi = aj + ai_i; 1254 nz = adiag[i] - ai_i; 1255 sum = b[i]; 1256 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1257 x[i] = sum; 1258 } 1259 1260 /* backward solve the upper triangular */ 1261 for (i=n-1; i>=0; i--) { 1262 adiag_i = adiag[i]; 1263 v = aa + adiag_i + 1; 1264 vi = aj + adiag_i + 1; 1265 nz = ai[i+1] - adiag_i - 1; 1266 sum = x[i]; 1267 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1268 x[i] = sum*aa[adiag_i]; 1269 } 1270 #endif 1271 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1272 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1273 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1279 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1280 { 1281 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1282 IS iscol = a->col,isrow = a->row; 1283 PetscErrorCode ierr; 1284 PetscInt i, n = A->rmap->n,j; 1285 PetscInt nz; 1286 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1287 PetscScalar *x,*tmp,sum; 1288 const PetscScalar *b; 1289 const MatScalar *aa = a->a,*v; 1290 1291 PetscFunctionBegin; 1292 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1293 1294 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1295 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1296 tmp = a->solve_work; 1297 1298 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1299 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1300 1301 /* forward solve the lower triangular */ 1302 tmp[0] = b[*r++]; 1303 for (i=1; i<n; i++) { 1304 v = aa + ai[i]; 1305 vi = aj + ai[i]; 1306 nz = a->diag[i] - ai[i]; 1307 sum = b[*r++]; 1308 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1309 tmp[i] = sum; 1310 } 1311 1312 /* backward solve the upper triangular */ 1313 for (i=n-1; i>=0; i--) { 1314 v = aa + a->diag[i] + 1; 1315 vi = aj + a->diag[i] + 1; 1316 nz = ai[i+1] - a->diag[i] - 1; 1317 sum = tmp[i]; 1318 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1319 tmp[i] = sum*aa[a->diag[i]]; 1320 x[*c--] += tmp[i]; 1321 } 1322 1323 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1324 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1325 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1326 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1327 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1333 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1334 { 1335 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1336 IS iscol = a->col,isrow = a->row; 1337 PetscErrorCode ierr; 1338 PetscInt i, n = A->rmap->n,j; 1339 PetscInt nz; 1340 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1341 PetscScalar *x,*tmp,sum; 1342 const PetscScalar *b; 1343 const MatScalar *aa = a->a,*v; 1344 1345 PetscFunctionBegin; 1346 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1347 1348 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1349 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1350 tmp = a->solve_work; 1351 1352 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1353 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1354 1355 /* forward solve the lower triangular */ 1356 tmp[0] = b[r[0]]; 1357 v = aa; 1358 vi = aj; 1359 for (i=1; i<n; i++) { 1360 nz = ai[i+1] - ai[i]; 1361 sum = b[r[i]]; 1362 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1363 tmp[i] = sum; 1364 v += nz; 1365 vi += nz; 1366 } 1367 1368 /* backward solve the upper triangular */ 1369 v = aa + adiag[n-1]; 1370 vi = aj + adiag[n-1]; 1371 for (i=n-1; i>=0; i--) { 1372 nz = adiag[i] - adiag[i+1] - 1; 1373 sum = tmp[i]; 1374 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1375 tmp[i] = sum*v[nz]; 1376 x[c[i]] += tmp[i]; 1377 v += nz+1; vi += nz+1; 1378 } 1379 1380 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1381 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1382 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1383 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1384 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1390 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1391 { 1392 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1393 IS iscol = a->col,isrow = a->row; 1394 PetscErrorCode ierr; 1395 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1396 PetscInt i,n = A->rmap->n,j; 1397 PetscInt nz; 1398 PetscScalar *x,*tmp,s1; 1399 const MatScalar *aa = a->a,*v; 1400 const PetscScalar *b; 1401 1402 PetscFunctionBegin; 1403 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1404 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1405 tmp = a->solve_work; 1406 1407 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1408 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1409 1410 /* copy the b into temp work space according to permutation */ 1411 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1412 1413 /* forward solve the U^T */ 1414 for (i=0; i<n; i++) { 1415 v = aa + diag[i]; 1416 vi = aj + diag[i] + 1; 1417 nz = ai[i+1] - diag[i] - 1; 1418 s1 = tmp[i]; 1419 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1420 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1421 tmp[i] = s1; 1422 } 1423 1424 /* backward solve the L^T */ 1425 for (i=n-1; i>=0; i--) { 1426 v = aa + diag[i] - 1; 1427 vi = aj + diag[i] - 1; 1428 nz = diag[i] - ai[i]; 1429 s1 = tmp[i]; 1430 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1431 } 1432 1433 /* copy tmp into x according to permutation */ 1434 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1435 1436 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1437 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1438 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1439 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1440 1441 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1447 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1448 { 1449 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1450 IS iscol = a->col,isrow = a->row; 1451 PetscErrorCode ierr; 1452 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1453 PetscInt i,n = A->rmap->n,j; 1454 PetscInt nz; 1455 PetscScalar *x,*tmp,s1; 1456 const MatScalar *aa = a->a,*v; 1457 const PetscScalar *b; 1458 1459 PetscFunctionBegin; 1460 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1461 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1462 tmp = a->solve_work; 1463 1464 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1465 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1466 1467 /* copy the b into temp work space according to permutation */ 1468 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1469 1470 /* forward solve the U^T */ 1471 for (i=0; i<n; i++) { 1472 v = aa + adiag[i+1] + 1; 1473 vi = aj + adiag[i+1] + 1; 1474 nz = adiag[i] - adiag[i+1] - 1; 1475 s1 = tmp[i]; 1476 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1477 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1478 tmp[i] = s1; 1479 } 1480 1481 /* backward solve the L^T */ 1482 for (i=n-1; i>=0; i--) { 1483 v = aa + ai[i]; 1484 vi = aj + ai[i]; 1485 nz = ai[i+1] - ai[i]; 1486 s1 = tmp[i]; 1487 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1488 } 1489 1490 /* copy tmp into x according to permutation */ 1491 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1492 1493 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1494 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1495 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1496 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1497 1498 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1504 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1505 { 1506 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1507 IS iscol = a->col,isrow = a->row; 1508 PetscErrorCode ierr; 1509 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1510 PetscInt i,n = A->rmap->n,j; 1511 PetscInt nz; 1512 PetscScalar *x,*tmp,s1; 1513 const MatScalar *aa = a->a,*v; 1514 const PetscScalar *b; 1515 1516 PetscFunctionBegin; 1517 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1518 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1519 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1520 tmp = a->solve_work; 1521 1522 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1523 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1524 1525 /* copy the b into temp work space according to permutation */ 1526 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1527 1528 /* forward solve the U^T */ 1529 for (i=0; i<n; i++) { 1530 v = aa + diag[i]; 1531 vi = aj + diag[i] + 1; 1532 nz = ai[i+1] - diag[i] - 1; 1533 s1 = tmp[i]; 1534 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1535 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1536 tmp[i] = s1; 1537 } 1538 1539 /* backward solve the L^T */ 1540 for (i=n-1; i>=0; i--) { 1541 v = aa + diag[i] - 1; 1542 vi = aj + diag[i] - 1; 1543 nz = diag[i] - ai[i]; 1544 s1 = tmp[i]; 1545 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1546 } 1547 1548 /* copy tmp into x according to permutation */ 1549 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1550 1551 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1552 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1553 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1554 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1555 1556 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1557 PetscFunctionReturn(0); 1558 } 1559 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1562 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1563 { 1564 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1565 IS iscol = a->col,isrow = a->row; 1566 PetscErrorCode ierr; 1567 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1568 PetscInt i,n = A->rmap->n,j; 1569 PetscInt nz; 1570 PetscScalar *x,*tmp,s1; 1571 const MatScalar *aa = a->a,*v; 1572 const PetscScalar *b; 1573 1574 PetscFunctionBegin; 1575 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1576 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1577 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1578 tmp = a->solve_work; 1579 1580 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1581 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1582 1583 /* copy the b into temp work space according to permutation */ 1584 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1585 1586 /* forward solve the U^T */ 1587 for (i=0; i<n; i++) { 1588 v = aa + adiag[i+1] + 1; 1589 vi = aj + adiag[i+1] + 1; 1590 nz = adiag[i] - adiag[i+1] - 1; 1591 s1 = tmp[i]; 1592 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1593 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1594 tmp[i] = s1; 1595 } 1596 1597 1598 /* backward solve the L^T */ 1599 for (i=n-1; i>=0; i--) { 1600 v = aa + ai[i]; 1601 vi = aj + ai[i]; 1602 nz = ai[i+1] - ai[i]; 1603 s1 = tmp[i]; 1604 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1605 } 1606 1607 /* copy tmp into x according to permutation */ 1608 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1609 1610 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1611 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1612 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1613 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1614 1615 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1616 PetscFunctionReturn(0); 1617 } 1618 1619 /* ----------------------------------------------------------------*/ 1620 1621 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool); 1622 1623 /* 1624 ilu() under revised new data structure. 1625 Factored arrays bj and ba are stored as 1626 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1627 1628 bi=fact->i is an array of size n+1, in which 1629 bi+ 1630 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1631 bi[n]: points to L(n-1,n-1)+1 1632 1633 bdiag=fact->diag is an array of size n+1,in which 1634 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1635 bdiag[n]: points to entry of U(n-1,0)-1 1636 1637 U(i,:) contains bdiag[i] as its last entry, i.e., 1638 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1639 */ 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1642 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1643 { 1644 1645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1646 PetscErrorCode ierr; 1647 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1648 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1649 PetscBool missing; 1650 IS isicol; 1651 1652 PetscFunctionBegin; 1653 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1654 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1655 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1656 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1657 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1658 b = (Mat_SeqAIJ*)(fact)->data; 1659 1660 /* allocate matrix arrays for new data structure */ 1661 ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); 1662 ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1663 1664 b->singlemalloc = PETSC_TRUE; 1665 if (!b->diag) { 1666 ierr = PetscMalloc1((n+1),&b->diag);CHKERRQ(ierr); 1667 ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1668 } 1669 bdiag = b->diag; 1670 1671 if (n > 0) { 1672 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1673 } 1674 1675 /* set bi and bj with new data structure */ 1676 bi = b->i; 1677 bj = b->j; 1678 1679 /* L part */ 1680 bi[0] = 0; 1681 for (i=0; i<n; i++) { 1682 nz = adiag[i] - ai[i]; 1683 bi[i+1] = bi[i] + nz; 1684 aj = a->j + ai[i]; 1685 for (j=0; j<nz; j++) { 1686 /* *bj = aj[j]; bj++; */ 1687 bj[k++] = aj[j]; 1688 } 1689 } 1690 1691 /* U part */ 1692 bdiag[n] = bi[n]-1; 1693 for (i=n-1; i>=0; i--) { 1694 nz = ai[i+1] - adiag[i] - 1; 1695 aj = a->j + adiag[i] + 1; 1696 for (j=0; j<nz; j++) { 1697 /* *bj = aj[j]; bj++; */ 1698 bj[k++] = aj[j]; 1699 } 1700 /* diag[i] */ 1701 /* *bj = i; bj++; */ 1702 bj[k++] = i; 1703 bdiag[i] = bdiag[i+1] + nz + 1; 1704 } 1705 1706 fact->factortype = MAT_FACTOR_ILU; 1707 fact->info.factor_mallocs = 0; 1708 fact->info.fill_ratio_given = info->fill; 1709 fact->info.fill_ratio_needed = 1.0; 1710 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1711 ierr = Mat_CheckInode_FactorLU(fact);CHKERRQ(ierr); 1712 1713 b = (Mat_SeqAIJ*)(fact)->data; 1714 b->row = isrow; 1715 b->col = iscol; 1716 b->icol = isicol; 1717 ierr = PetscMalloc1((fact->rmap->n+1),&b->solve_work);CHKERRQ(ierr); 1718 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1719 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1725 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1726 { 1727 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1728 IS isicol; 1729 PetscErrorCode ierr; 1730 const PetscInt *r,*ic; 1731 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1732 PetscInt *bi,*cols,nnz,*cols_lvl; 1733 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1734 PetscInt i,levels,diagonal_fill; 1735 PetscBool col_identity,row_identity; 1736 PetscReal f; 1737 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1738 PetscBT lnkbt; 1739 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1740 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1741 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1742 1743 PetscFunctionBegin; 1744 /* Uncomment the old data struct part only while testing new data structure for MatSolve() */ 1745 /* 1746 PetscBool olddatastruct=PETSC_FALSE; 1747 ierr = PetscOptionsGetBool(NULL,"-ilu_old",&olddatastruct,NULL);CHKERRQ(ierr); 1748 if (olddatastruct) { 1749 ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1750 PetscFunctionReturn(0); 1751 } 1752 */ 1753 1754 levels = (PetscInt)info->levels; 1755 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1756 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1757 if (!levels && row_identity && col_identity) { 1758 /* special case: ilu(0) with natural ordering */ 1759 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1760 if (a->inode.size) { 1761 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1762 } 1763 PetscFunctionReturn(0); 1764 } 1765 1766 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1767 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1768 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1769 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1770 1771 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1772 ierr = PetscMalloc1((n+1),&bi);CHKERRQ(ierr); 1773 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); 1774 bi[0] = bdiag[0] = 0; 1775 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1776 1777 /* create a linked list for storing column indices of the active row */ 1778 nlnk = n + 1; 1779 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1780 1781 /* initial FreeSpace size is f*(ai[n]+1) */ 1782 f = info->fill; 1783 diagonal_fill = (PetscInt)info->diagonal_fill; 1784 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1785 current_space = free_space; 1786 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1787 current_space_lvl = free_space_lvl; 1788 for (i=0; i<n; i++) { 1789 nzi = 0; 1790 /* copy current row into linked list */ 1791 nnz = ai[r[i]+1] - ai[r[i]]; 1792 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1793 cols = aj + ai[r[i]]; 1794 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1795 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1796 nzi += nlnk; 1797 1798 /* make sure diagonal entry is included */ 1799 if (diagonal_fill && lnk[i] == -1) { 1800 fm = n; 1801 while (lnk[fm] < i) fm = lnk[fm]; 1802 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1803 lnk[fm] = i; 1804 lnk_lvl[i] = 0; 1805 nzi++; dcount++; 1806 } 1807 1808 /* add pivot rows into the active row */ 1809 nzbd = 0; 1810 prow = lnk[n]; 1811 while (prow < i) { 1812 nnz = bdiag[prow]; 1813 cols = bj_ptr[prow] + nnz + 1; 1814 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1815 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1816 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1817 nzi += nlnk; 1818 prow = lnk[prow]; 1819 nzbd++; 1820 } 1821 bdiag[i] = nzbd; 1822 bi[i+1] = bi[i] + nzi; 1823 /* if free space is not available, make more free space */ 1824 if (current_space->local_remaining<nzi) { 1825 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1826 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1827 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1828 reallocs++; 1829 } 1830 1831 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1832 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1833 bj_ptr[i] = current_space->array; 1834 bjlvl_ptr[i] = current_space_lvl->array; 1835 1836 /* make sure the active row i has diagonal entry */ 1837 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1838 1839 current_space->array += nzi; 1840 current_space->local_used += nzi; 1841 current_space->local_remaining -= nzi; 1842 current_space_lvl->array += nzi; 1843 current_space_lvl->local_used += nzi; 1844 current_space_lvl->local_remaining -= nzi; 1845 } 1846 1847 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1848 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1849 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1850 ierr = PetscMalloc1((bi[n]+1),&bj);CHKERRQ(ierr); 1851 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1852 1853 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1854 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1855 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1856 1857 #if defined(PETSC_USE_INFO) 1858 { 1859 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1860 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 1861 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1862 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 1863 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1864 if (diagonal_fill) { 1865 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1866 } 1867 } 1868 #endif 1869 /* put together the new matrix */ 1870 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1871 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 1872 b = (Mat_SeqAIJ*)(fact)->data; 1873 1874 b->free_a = PETSC_TRUE; 1875 b->free_ij = PETSC_TRUE; 1876 b->singlemalloc = PETSC_FALSE; 1877 1878 ierr = PetscMalloc1((bdiag[0]+1),&b->a);CHKERRQ(ierr); 1879 1880 b->j = bj; 1881 b->i = bi; 1882 b->diag = bdiag; 1883 b->ilen = 0; 1884 b->imax = 0; 1885 b->row = isrow; 1886 b->col = iscol; 1887 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1888 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1889 b->icol = isicol; 1890 1891 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 1892 /* In b structure: Free imax, ilen, old a, old j. 1893 Allocate bdiag, solve_work, new a, new j */ 1894 ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1895 b->maxnz = b->nz = bdiag[0]+1; 1896 1897 (fact)->info.factor_mallocs = reallocs; 1898 (fact)->info.fill_ratio_given = f; 1899 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1900 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1901 if (a->inode.size) { 1902 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1903 } 1904 ierr = Mat_CheckInode_FactorLU(fact);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1910 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1911 { 1912 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1913 IS isicol; 1914 PetscErrorCode ierr; 1915 const PetscInt *r,*ic; 1916 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1917 PetscInt *bi,*cols,nnz,*cols_lvl; 1918 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1919 PetscInt i,levels,diagonal_fill; 1920 PetscBool col_identity,row_identity; 1921 PetscReal f; 1922 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1923 PetscBT lnkbt; 1924 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1925 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1926 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1927 PetscBool missing; 1928 1929 PetscFunctionBegin; 1930 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1931 f = info->fill; 1932 levels = (PetscInt)info->levels; 1933 diagonal_fill = (PetscInt)info->diagonal_fill; 1934 1935 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1936 1937 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1938 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1939 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1940 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1941 1942 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1943 if (a->inode.size) { 1944 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1945 } 1946 fact->factortype = MAT_FACTOR_ILU; 1947 (fact)->info.factor_mallocs = 0; 1948 (fact)->info.fill_ratio_given = info->fill; 1949 (fact)->info.fill_ratio_needed = 1.0; 1950 1951 b = (Mat_SeqAIJ*)(fact)->data; 1952 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1953 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1954 b->row = isrow; 1955 b->col = iscol; 1956 b->icol = isicol; 1957 ierr = PetscMalloc1(((fact)->rmap->n+1),&b->solve_work);CHKERRQ(ierr); 1958 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1959 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1964 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1965 1966 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1967 ierr = PetscMalloc1((n+1),&bi);CHKERRQ(ierr); 1968 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); 1969 bi[0] = bdiag[0] = 0; 1970 1971 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1972 1973 /* create a linked list for storing column indices of the active row */ 1974 nlnk = n + 1; 1975 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1976 1977 /* initial FreeSpace size is f*(ai[n]+1) */ 1978 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1979 current_space = free_space; 1980 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1981 current_space_lvl = free_space_lvl; 1982 1983 for (i=0; i<n; i++) { 1984 nzi = 0; 1985 /* copy current row into linked list */ 1986 nnz = ai[r[i]+1] - ai[r[i]]; 1987 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1988 cols = aj + ai[r[i]]; 1989 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1990 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1991 nzi += nlnk; 1992 1993 /* make sure diagonal entry is included */ 1994 if (diagonal_fill && lnk[i] == -1) { 1995 fm = n; 1996 while (lnk[fm] < i) fm = lnk[fm]; 1997 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1998 lnk[fm] = i; 1999 lnk_lvl[i] = 0; 2000 nzi++; dcount++; 2001 } 2002 2003 /* add pivot rows into the active row */ 2004 nzbd = 0; 2005 prow = lnk[n]; 2006 while (prow < i) { 2007 nnz = bdiag[prow]; 2008 cols = bj_ptr[prow] + nnz + 1; 2009 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 2010 nnz = bi[prow+1] - bi[prow] - nnz - 1; 2011 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 2012 nzi += nlnk; 2013 prow = lnk[prow]; 2014 nzbd++; 2015 } 2016 bdiag[i] = nzbd; 2017 bi[i+1] = bi[i] + nzi; 2018 2019 /* if free space is not available, make more free space */ 2020 if (current_space->local_remaining<nzi) { 2021 nnz = nzi*(n - i); /* estimated and max additional space needed */ 2022 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2023 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2024 reallocs++; 2025 } 2026 2027 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2028 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2029 bj_ptr[i] = current_space->array; 2030 bjlvl_ptr[i] = current_space_lvl->array; 2031 2032 /* make sure the active row i has diagonal entry */ 2033 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2034 2035 current_space->array += nzi; 2036 current_space->local_used += nzi; 2037 current_space->local_remaining -= nzi; 2038 current_space_lvl->array += nzi; 2039 current_space_lvl->local_used += nzi; 2040 current_space_lvl->local_remaining -= nzi; 2041 } 2042 2043 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2044 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2045 2046 /* destroy list of free space and other temporary arrays */ 2047 ierr = PetscMalloc1((bi[n]+1),&bj);CHKERRQ(ierr); 2048 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2049 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2050 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2051 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2052 2053 #if defined(PETSC_USE_INFO) 2054 { 2055 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2056 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 2057 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2058 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 2059 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2060 if (diagonal_fill) { 2061 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2062 } 2063 } 2064 #endif 2065 2066 /* put together the new matrix */ 2067 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2068 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 2069 b = (Mat_SeqAIJ*)(fact)->data; 2070 2071 b->free_a = PETSC_TRUE; 2072 b->free_ij = PETSC_TRUE; 2073 b->singlemalloc = PETSC_FALSE; 2074 2075 ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr); 2076 b->j = bj; 2077 b->i = bi; 2078 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2079 b->diag = bdiag; 2080 b->ilen = 0; 2081 b->imax = 0; 2082 b->row = isrow; 2083 b->col = iscol; 2084 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2085 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2086 b->icol = isicol; 2087 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 2088 /* In b structure: Free imax, ilen, old a, old j. 2089 Allocate bdiag, solve_work, new a, new j */ 2090 ierr = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2091 b->maxnz = b->nz = bi[n]; 2092 2093 (fact)->info.factor_mallocs = reallocs; 2094 (fact)->info.fill_ratio_given = f; 2095 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2096 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2097 if (a->inode.size) { 2098 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2099 } 2100 PetscFunctionReturn(0); 2101 } 2102 2103 #undef __FUNCT__ 2104 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2105 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2106 { 2107 Mat C = B; 2108 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2109 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2110 IS ip=b->row,iip = b->icol; 2111 PetscErrorCode ierr; 2112 const PetscInt *rip,*riip; 2113 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2114 PetscInt *ai=a->i,*aj=a->j; 2115 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2116 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2117 PetscBool perm_identity; 2118 FactorShiftCtx sctx; 2119 PetscReal rs; 2120 MatScalar d,*v; 2121 2122 PetscFunctionBegin; 2123 /* MatPivotSetUp(): initialize shift context sctx */ 2124 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2125 2126 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2127 sctx.shift_top = info->zeropivot; 2128 for (i=0; i<mbs; i++) { 2129 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2130 d = (aa)[a->diag[i]]; 2131 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2132 v = aa+ai[i]; 2133 nz = ai[i+1] - ai[i]; 2134 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2135 if (rs>sctx.shift_top) sctx.shift_top = rs; 2136 } 2137 sctx.shift_top *= 1.1; 2138 sctx.nshift_max = 5; 2139 sctx.shift_lo = 0.; 2140 sctx.shift_hi = 1.; 2141 } 2142 2143 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2144 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2145 2146 /* allocate working arrays 2147 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2148 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2149 */ 2150 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr); 2151 2152 do { 2153 sctx.newshift = PETSC_FALSE; 2154 2155 for (i=0; i<mbs; i++) c2r[i] = mbs; 2156 if (mbs) il[0] = 0; 2157 2158 for (k = 0; k<mbs; k++) { 2159 /* zero rtmp */ 2160 nz = bi[k+1] - bi[k]; 2161 bjtmp = bj + bi[k]; 2162 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2163 2164 /* load in initial unfactored row */ 2165 bval = ba + bi[k]; 2166 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2167 for (j = jmin; j < jmax; j++) { 2168 col = riip[aj[j]]; 2169 if (col >= k) { /* only take upper triangular entry */ 2170 rtmp[col] = aa[j]; 2171 *bval++ = 0.0; /* for in-place factorization */ 2172 } 2173 } 2174 /* shift the diagonal of the matrix: ZeropivotApply() */ 2175 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2176 2177 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2178 dk = rtmp[k]; 2179 i = c2r[k]; /* first row to be added to k_th row */ 2180 2181 while (i < k) { 2182 nexti = c2r[i]; /* next row to be added to k_th row */ 2183 2184 /* compute multiplier, update diag(k) and U(i,k) */ 2185 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2186 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2187 dk += uikdi*ba[ili]; /* update diag[k] */ 2188 ba[ili] = uikdi; /* -U(i,k) */ 2189 2190 /* add multiple of row i to k-th row */ 2191 jmin = ili + 1; jmax = bi[i+1]; 2192 if (jmin < jmax) { 2193 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2194 /* update il and c2r for row i */ 2195 il[i] = jmin; 2196 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2197 } 2198 i = nexti; 2199 } 2200 2201 /* copy data into U(k,:) */ 2202 rs = 0.0; 2203 jmin = bi[k]; jmax = bi[k+1]-1; 2204 if (jmin < jmax) { 2205 for (j=jmin; j<jmax; j++) { 2206 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2207 } 2208 /* add the k-th row into il and c2r */ 2209 il[k] = jmin; 2210 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2211 } 2212 2213 /* MatPivotCheck() */ 2214 sctx.rs = rs; 2215 sctx.pv = dk; 2216 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2217 if (sctx.newshift) break; 2218 dk = sctx.pv; 2219 2220 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2221 } 2222 } while (sctx.newshift); 2223 2224 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2225 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2226 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2227 2228 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2229 if (perm_identity) { 2230 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2231 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2232 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2233 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2234 } else { 2235 B->ops->solve = MatSolve_SeqSBAIJ_1; 2236 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2237 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2238 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2239 } 2240 2241 C->assembled = PETSC_TRUE; 2242 C->preallocated = PETSC_TRUE; 2243 2244 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2245 2246 /* MatPivotView() */ 2247 if (sctx.nshift) { 2248 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2249 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 2250 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2251 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2252 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2253 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 2254 } 2255 } 2256 PetscFunctionReturn(0); 2257 } 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2261 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2262 { 2263 Mat C = B; 2264 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2265 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2266 IS ip=b->row,iip = b->icol; 2267 PetscErrorCode ierr; 2268 const PetscInt *rip,*riip; 2269 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2270 PetscInt *ai=a->i,*aj=a->j; 2271 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2272 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2273 PetscBool perm_identity; 2274 FactorShiftCtx sctx; 2275 PetscReal rs; 2276 MatScalar d,*v; 2277 2278 PetscFunctionBegin; 2279 /* MatPivotSetUp(): initialize shift context sctx */ 2280 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2281 2282 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2283 sctx.shift_top = info->zeropivot; 2284 for (i=0; i<mbs; i++) { 2285 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2286 d = (aa)[a->diag[i]]; 2287 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2288 v = aa+ai[i]; 2289 nz = ai[i+1] - ai[i]; 2290 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2291 if (rs>sctx.shift_top) sctx.shift_top = rs; 2292 } 2293 sctx.shift_top *= 1.1; 2294 sctx.nshift_max = 5; 2295 sctx.shift_lo = 0.; 2296 sctx.shift_hi = 1.; 2297 } 2298 2299 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2300 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2301 2302 /* initialization */ 2303 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr); 2304 2305 do { 2306 sctx.newshift = PETSC_FALSE; 2307 2308 for (i=0; i<mbs; i++) jl[i] = mbs; 2309 il[0] = 0; 2310 2311 for (k = 0; k<mbs; k++) { 2312 /* zero rtmp */ 2313 nz = bi[k+1] - bi[k]; 2314 bjtmp = bj + bi[k]; 2315 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2316 2317 bval = ba + bi[k]; 2318 /* initialize k-th row by the perm[k]-th row of A */ 2319 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2320 for (j = jmin; j < jmax; j++) { 2321 col = riip[aj[j]]; 2322 if (col >= k) { /* only take upper triangular entry */ 2323 rtmp[col] = aa[j]; 2324 *bval++ = 0.0; /* for in-place factorization */ 2325 } 2326 } 2327 /* shift the diagonal of the matrix */ 2328 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2329 2330 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2331 dk = rtmp[k]; 2332 i = jl[k]; /* first row to be added to k_th row */ 2333 2334 while (i < k) { 2335 nexti = jl[i]; /* next row to be added to k_th row */ 2336 2337 /* compute multiplier, update diag(k) and U(i,k) */ 2338 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2339 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2340 dk += uikdi*ba[ili]; 2341 ba[ili] = uikdi; /* -U(i,k) */ 2342 2343 /* add multiple of row i to k-th row */ 2344 jmin = ili + 1; jmax = bi[i+1]; 2345 if (jmin < jmax) { 2346 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2347 /* update il and jl for row i */ 2348 il[i] = jmin; 2349 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2350 } 2351 i = nexti; 2352 } 2353 2354 /* shift the diagonals when zero pivot is detected */ 2355 /* compute rs=sum of abs(off-diagonal) */ 2356 rs = 0.0; 2357 jmin = bi[k]+1; 2358 nz = bi[k+1] - jmin; 2359 bcol = bj + jmin; 2360 for (j=0; j<nz; j++) { 2361 rs += PetscAbsScalar(rtmp[bcol[j]]); 2362 } 2363 2364 sctx.rs = rs; 2365 sctx.pv = dk; 2366 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2367 if (sctx.newshift) break; 2368 dk = sctx.pv; 2369 2370 /* copy data into U(k,:) */ 2371 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2372 jmin = bi[k]+1; jmax = bi[k+1]; 2373 if (jmin < jmax) { 2374 for (j=jmin; j<jmax; j++) { 2375 col = bj[j]; ba[j] = rtmp[col]; 2376 } 2377 /* add the k-th row into il and jl */ 2378 il[k] = jmin; 2379 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2380 } 2381 } 2382 } while (sctx.newshift); 2383 2384 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2385 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2386 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2387 2388 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2389 if (perm_identity) { 2390 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2391 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2392 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2393 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2394 } else { 2395 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2396 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2397 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2398 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2399 } 2400 2401 C->assembled = PETSC_TRUE; 2402 C->preallocated = PETSC_TRUE; 2403 2404 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2405 if (sctx.nshift) { 2406 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2407 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2408 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2409 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2410 } 2411 } 2412 PetscFunctionReturn(0); 2413 } 2414 2415 /* 2416 icc() under revised new data structure. 2417 Factored arrays bj and ba are stored as 2418 U(0,:),...,U(i,:),U(n-1,:) 2419 2420 ui=fact->i is an array of size n+1, in which 2421 ui+ 2422 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2423 ui[n]: points to U(n-1,n-1)+1 2424 2425 udiag=fact->diag is an array of size n,in which 2426 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2427 2428 U(i,:) contains udiag[i] as its last entry, i.e., 2429 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2430 */ 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2434 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2435 { 2436 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2437 Mat_SeqSBAIJ *b; 2438 PetscErrorCode ierr; 2439 PetscBool perm_identity,missing; 2440 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2441 const PetscInt *rip,*riip; 2442 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2443 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2444 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2445 PetscReal fill =info->fill,levels=info->levels; 2446 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2447 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2448 PetscBT lnkbt; 2449 IS iperm; 2450 2451 PetscFunctionBegin; 2452 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2453 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2454 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2455 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2456 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2457 2458 ierr = PetscMalloc1((am+1),&ui);CHKERRQ(ierr); 2459 ierr = PetscMalloc1((am+1),&udiag);CHKERRQ(ierr); 2460 ui[0] = 0; 2461 2462 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2463 if (!levels && perm_identity) { 2464 for (i=0; i<am; i++) { 2465 ncols = ai[i+1] - a->diag[i]; 2466 ui[i+1] = ui[i] + ncols; 2467 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2468 } 2469 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2470 cols = uj; 2471 for (i=0; i<am; i++) { 2472 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2473 ncols = ai[i+1] - a->diag[i] -1; 2474 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2475 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2476 } 2477 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2478 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2479 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2480 2481 /* initialization */ 2482 ierr = PetscMalloc1((am+1),&ajtmp);CHKERRQ(ierr); 2483 2484 /* jl: linked list for storing indices of the pivot rows 2485 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2486 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2487 for (i=0; i<am; i++) { 2488 jl[i] = am; il[i] = 0; 2489 } 2490 2491 /* create and initialize a linked list for storing column indices of the active row k */ 2492 nlnk = am + 1; 2493 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2494 2495 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2496 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2497 current_space = free_space; 2498 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2499 current_space_lvl = free_space_lvl; 2500 2501 for (k=0; k<am; k++) { /* for each active row k */ 2502 /* initialize lnk by the column indices of row rip[k] of A */ 2503 nzk = 0; 2504 ncols = ai[rip[k]+1] - ai[rip[k]]; 2505 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2506 ncols_upper = 0; 2507 for (j=0; j<ncols; j++) { 2508 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2509 if (riip[i] >= k) { /* only take upper triangular entry */ 2510 ajtmp[ncols_upper] = i; 2511 ncols_upper++; 2512 } 2513 } 2514 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2515 nzk += nlnk; 2516 2517 /* update lnk by computing fill-in for each pivot row to be merged in */ 2518 prow = jl[k]; /* 1st pivot row */ 2519 2520 while (prow < k) { 2521 nextprow = jl[prow]; 2522 2523 /* merge prow into k-th row */ 2524 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2525 jmax = ui[prow+1]; 2526 ncols = jmax-jmin; 2527 i = jmin - ui[prow]; 2528 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2529 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2530 j = *(uj - 1); 2531 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2532 nzk += nlnk; 2533 2534 /* update il and jl for prow */ 2535 if (jmin < jmax) { 2536 il[prow] = jmin; 2537 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2538 } 2539 prow = nextprow; 2540 } 2541 2542 /* if free space is not available, make more free space */ 2543 if (current_space->local_remaining<nzk) { 2544 i = am - k + 1; /* num of unfactored rows */ 2545 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2546 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2547 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2548 reallocs++; 2549 } 2550 2551 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2552 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2553 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2554 2555 /* add the k-th row into il and jl */ 2556 if (nzk > 1) { 2557 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2558 jl[k] = jl[i]; jl[i] = k; 2559 il[k] = ui[k] + 1; 2560 } 2561 uj_ptr[k] = current_space->array; 2562 uj_lvl_ptr[k] = current_space_lvl->array; 2563 2564 current_space->array += nzk; 2565 current_space->local_used += nzk; 2566 current_space->local_remaining -= nzk; 2567 2568 current_space_lvl->array += nzk; 2569 current_space_lvl->local_used += nzk; 2570 current_space_lvl->local_remaining -= nzk; 2571 2572 ui[k+1] = ui[k] + nzk; 2573 } 2574 2575 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2576 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2577 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2578 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2579 2580 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2581 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2582 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2583 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2584 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2585 2586 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2587 2588 /* put together the new matrix in MATSEQSBAIJ format */ 2589 b = (Mat_SeqSBAIJ*)(fact)->data; 2590 b->singlemalloc = PETSC_FALSE; 2591 2592 ierr = PetscMalloc1((ui[am]+1),&b->a);CHKERRQ(ierr); 2593 2594 b->j = uj; 2595 b->i = ui; 2596 b->diag = udiag; 2597 b->free_diag = PETSC_TRUE; 2598 b->ilen = 0; 2599 b->imax = 0; 2600 b->row = perm; 2601 b->col = perm; 2602 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2603 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2604 b->icol = iperm; 2605 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2606 2607 ierr = PetscMalloc1((am+1),&b->solve_work);CHKERRQ(ierr); 2608 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2609 2610 b->maxnz = b->nz = ui[am]; 2611 b->free_a = PETSC_TRUE; 2612 b->free_ij = PETSC_TRUE; 2613 2614 fact->info.factor_mallocs = reallocs; 2615 fact->info.fill_ratio_given = fill; 2616 if (ai[am] != 0) { 2617 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2618 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2619 } else { 2620 fact->info.fill_ratio_needed = 0.0; 2621 } 2622 #if defined(PETSC_USE_INFO) 2623 if (ai[am] != 0) { 2624 PetscReal af = fact->info.fill_ratio_needed; 2625 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2626 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2627 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2628 } else { 2629 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2630 } 2631 #endif 2632 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2633 PetscFunctionReturn(0); 2634 } 2635 2636 #undef __FUNCT__ 2637 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2638 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2639 { 2640 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2641 Mat_SeqSBAIJ *b; 2642 PetscErrorCode ierr; 2643 PetscBool perm_identity,missing; 2644 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2645 const PetscInt *rip,*riip; 2646 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2647 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2648 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2649 PetscReal fill =info->fill,levels=info->levels; 2650 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2651 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2652 PetscBT lnkbt; 2653 IS iperm; 2654 2655 PetscFunctionBegin; 2656 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2657 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2658 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2659 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2660 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2661 2662 ierr = PetscMalloc1((am+1),&ui);CHKERRQ(ierr); 2663 ierr = PetscMalloc1((am+1),&udiag);CHKERRQ(ierr); 2664 ui[0] = 0; 2665 2666 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2667 if (!levels && perm_identity) { 2668 2669 for (i=0; i<am; i++) { 2670 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2671 udiag[i] = ui[i]; 2672 } 2673 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2674 cols = uj; 2675 for (i=0; i<am; i++) { 2676 aj = a->j + a->diag[i]; 2677 ncols = ui[i+1] - ui[i]; 2678 for (j=0; j<ncols; j++) *cols++ = *aj++; 2679 } 2680 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2681 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2682 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2683 2684 /* initialization */ 2685 ierr = PetscMalloc1((am+1),&ajtmp);CHKERRQ(ierr); 2686 2687 /* jl: linked list for storing indices of the pivot rows 2688 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2689 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2690 for (i=0; i<am; i++) { 2691 jl[i] = am; il[i] = 0; 2692 } 2693 2694 /* create and initialize a linked list for storing column indices of the active row k */ 2695 nlnk = am + 1; 2696 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2697 2698 /* initial FreeSpace size is fill*(ai[am]+1) */ 2699 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2700 current_space = free_space; 2701 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2702 current_space_lvl = free_space_lvl; 2703 2704 for (k=0; k<am; k++) { /* for each active row k */ 2705 /* initialize lnk by the column indices of row rip[k] of A */ 2706 nzk = 0; 2707 ncols = ai[rip[k]+1] - ai[rip[k]]; 2708 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2709 ncols_upper = 0; 2710 for (j=0; j<ncols; j++) { 2711 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2712 if (riip[i] >= k) { /* only take upper triangular entry */ 2713 ajtmp[ncols_upper] = i; 2714 ncols_upper++; 2715 } 2716 } 2717 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2718 nzk += nlnk; 2719 2720 /* update lnk by computing fill-in for each pivot row to be merged in */ 2721 prow = jl[k]; /* 1st pivot row */ 2722 2723 while (prow < k) { 2724 nextprow = jl[prow]; 2725 2726 /* merge prow into k-th row */ 2727 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2728 jmax = ui[prow+1]; 2729 ncols = jmax-jmin; 2730 i = jmin - ui[prow]; 2731 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2732 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2733 j = *(uj - 1); 2734 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2735 nzk += nlnk; 2736 2737 /* update il and jl for prow */ 2738 if (jmin < jmax) { 2739 il[prow] = jmin; 2740 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2741 } 2742 prow = nextprow; 2743 } 2744 2745 /* if free space is not available, make more free space */ 2746 if (current_space->local_remaining<nzk) { 2747 i = am - k + 1; /* num of unfactored rows */ 2748 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2749 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2750 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2751 reallocs++; 2752 } 2753 2754 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2755 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2756 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2757 2758 /* add the k-th row into il and jl */ 2759 if (nzk > 1) { 2760 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2761 jl[k] = jl[i]; jl[i] = k; 2762 il[k] = ui[k] + 1; 2763 } 2764 uj_ptr[k] = current_space->array; 2765 uj_lvl_ptr[k] = current_space_lvl->array; 2766 2767 current_space->array += nzk; 2768 current_space->local_used += nzk; 2769 current_space->local_remaining -= nzk; 2770 2771 current_space_lvl->array += nzk; 2772 current_space_lvl->local_used += nzk; 2773 current_space_lvl->local_remaining -= nzk; 2774 2775 ui[k+1] = ui[k] + nzk; 2776 } 2777 2778 #if defined(PETSC_USE_INFO) 2779 if (ai[am] != 0) { 2780 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2781 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2782 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2783 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2784 } else { 2785 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2786 } 2787 #endif 2788 2789 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2790 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2791 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2792 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2793 2794 /* destroy list of free space and other temporary array(s) */ 2795 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2796 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2797 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2798 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2799 2800 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2801 2802 /* put together the new matrix in MATSEQSBAIJ format */ 2803 2804 b = (Mat_SeqSBAIJ*)fact->data; 2805 b->singlemalloc = PETSC_FALSE; 2806 2807 ierr = PetscMalloc1((ui[am]+1),&b->a);CHKERRQ(ierr); 2808 2809 b->j = uj; 2810 b->i = ui; 2811 b->diag = udiag; 2812 b->free_diag = PETSC_TRUE; 2813 b->ilen = 0; 2814 b->imax = 0; 2815 b->row = perm; 2816 b->col = perm; 2817 2818 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2819 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2820 2821 b->icol = iperm; 2822 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2823 ierr = PetscMalloc1((am+1),&b->solve_work);CHKERRQ(ierr); 2824 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2825 b->maxnz = b->nz = ui[am]; 2826 b->free_a = PETSC_TRUE; 2827 b->free_ij = PETSC_TRUE; 2828 2829 fact->info.factor_mallocs = reallocs; 2830 fact->info.fill_ratio_given = fill; 2831 if (ai[am] != 0) { 2832 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2833 } else { 2834 fact->info.fill_ratio_needed = 0.0; 2835 } 2836 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2837 PetscFunctionReturn(0); 2838 } 2839 2840 #undef __FUNCT__ 2841 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2842 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2843 { 2844 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2845 Mat_SeqSBAIJ *b; 2846 PetscErrorCode ierr; 2847 PetscBool perm_identity; 2848 PetscReal fill = info->fill; 2849 const PetscInt *rip,*riip; 2850 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2851 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2852 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2853 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2854 PetscBT lnkbt; 2855 IS iperm; 2856 2857 PetscFunctionBegin; 2858 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2859 /* check whether perm is the identity mapping */ 2860 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2861 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2862 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2863 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2864 2865 /* initialization */ 2866 ierr = PetscMalloc1((am+1),&ui);CHKERRQ(ierr); 2867 ierr = PetscMalloc1((am+1),&udiag);CHKERRQ(ierr); 2868 ui[0] = 0; 2869 2870 /* jl: linked list for storing indices of the pivot rows 2871 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2872 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2873 for (i=0; i<am; i++) { 2874 jl[i] = am; il[i] = 0; 2875 } 2876 2877 /* create and initialize a linked list for storing column indices of the active row k */ 2878 nlnk = am + 1; 2879 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2880 2881 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2882 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2883 current_space = free_space; 2884 2885 for (k=0; k<am; k++) { /* for each active row k */ 2886 /* initialize lnk by the column indices of row rip[k] of A */ 2887 nzk = 0; 2888 ncols = ai[rip[k]+1] - ai[rip[k]]; 2889 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2890 ncols_upper = 0; 2891 for (j=0; j<ncols; j++) { 2892 i = riip[*(aj + ai[rip[k]] + j)]; 2893 if (i >= k) { /* only take upper triangular entry */ 2894 cols[ncols_upper] = i; 2895 ncols_upper++; 2896 } 2897 } 2898 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2899 nzk += nlnk; 2900 2901 /* update lnk by computing fill-in for each pivot row to be merged in */ 2902 prow = jl[k]; /* 1st pivot row */ 2903 2904 while (prow < k) { 2905 nextprow = jl[prow]; 2906 /* merge prow into k-th row */ 2907 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2908 jmax = ui[prow+1]; 2909 ncols = jmax-jmin; 2910 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2911 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2912 nzk += nlnk; 2913 2914 /* update il and jl for prow */ 2915 if (jmin < jmax) { 2916 il[prow] = jmin; 2917 j = *uj_ptr; 2918 jl[prow] = jl[j]; 2919 jl[j] = prow; 2920 } 2921 prow = nextprow; 2922 } 2923 2924 /* if free space is not available, make more free space */ 2925 if (current_space->local_remaining<nzk) { 2926 i = am - k + 1; /* num of unfactored rows */ 2927 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2928 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2929 reallocs++; 2930 } 2931 2932 /* copy data into free space, then initialize lnk */ 2933 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2934 2935 /* add the k-th row into il and jl */ 2936 if (nzk > 1) { 2937 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2938 jl[k] = jl[i]; jl[i] = k; 2939 il[k] = ui[k] + 1; 2940 } 2941 ui_ptr[k] = current_space->array; 2942 2943 current_space->array += nzk; 2944 current_space->local_used += nzk; 2945 current_space->local_remaining -= nzk; 2946 2947 ui[k+1] = ui[k] + nzk; 2948 } 2949 2950 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2951 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2952 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2953 2954 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2955 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2956 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2957 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2958 2959 /* put together the new matrix in MATSEQSBAIJ format */ 2960 2961 b = (Mat_SeqSBAIJ*)fact->data; 2962 b->singlemalloc = PETSC_FALSE; 2963 b->free_a = PETSC_TRUE; 2964 b->free_ij = PETSC_TRUE; 2965 2966 ierr = PetscMalloc1((ui[am]+1),&b->a);CHKERRQ(ierr); 2967 2968 b->j = uj; 2969 b->i = ui; 2970 b->diag = udiag; 2971 b->free_diag = PETSC_TRUE; 2972 b->ilen = 0; 2973 b->imax = 0; 2974 b->row = perm; 2975 b->col = perm; 2976 2977 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2978 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2979 2980 b->icol = iperm; 2981 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2982 2983 ierr = PetscMalloc1((am+1),&b->solve_work);CHKERRQ(ierr); 2984 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2985 2986 b->maxnz = b->nz = ui[am]; 2987 2988 fact->info.factor_mallocs = reallocs; 2989 fact->info.fill_ratio_given = fill; 2990 if (ai[am] != 0) { 2991 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2992 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2993 } else { 2994 fact->info.fill_ratio_needed = 0.0; 2995 } 2996 #if defined(PETSC_USE_INFO) 2997 if (ai[am] != 0) { 2998 PetscReal af = fact->info.fill_ratio_needed; 2999 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 3000 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 3001 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 3002 } else { 3003 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3004 } 3005 #endif 3006 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 3007 PetscFunctionReturn(0); 3008 } 3009 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 3012 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 3013 { 3014 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3015 Mat_SeqSBAIJ *b; 3016 PetscErrorCode ierr; 3017 PetscBool perm_identity; 3018 PetscReal fill = info->fill; 3019 const PetscInt *rip,*riip; 3020 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 3021 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 3022 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 3023 PetscFreeSpaceList free_space=NULL,current_space=NULL; 3024 PetscBT lnkbt; 3025 IS iperm; 3026 3027 PetscFunctionBegin; 3028 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 3029 /* check whether perm is the identity mapping */ 3030 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 3031 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 3032 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 3033 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 3034 3035 /* initialization */ 3036 ierr = PetscMalloc1((am+1),&ui);CHKERRQ(ierr); 3037 ui[0] = 0; 3038 3039 /* jl: linked list for storing indices of the pivot rows 3040 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3041 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 3042 for (i=0; i<am; i++) { 3043 jl[i] = am; il[i] = 0; 3044 } 3045 3046 /* create and initialize a linked list for storing column indices of the active row k */ 3047 nlnk = am + 1; 3048 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3049 3050 /* initial FreeSpace size is fill*(ai[am]+1) */ 3051 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3052 current_space = free_space; 3053 3054 for (k=0; k<am; k++) { /* for each active row k */ 3055 /* initialize lnk by the column indices of row rip[k] of A */ 3056 nzk = 0; 3057 ncols = ai[rip[k]+1] - ai[rip[k]]; 3058 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3059 ncols_upper = 0; 3060 for (j=0; j<ncols; j++) { 3061 i = riip[*(aj + ai[rip[k]] + j)]; 3062 if (i >= k) { /* only take upper triangular entry */ 3063 cols[ncols_upper] = i; 3064 ncols_upper++; 3065 } 3066 } 3067 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3068 nzk += nlnk; 3069 3070 /* update lnk by computing fill-in for each pivot row to be merged in */ 3071 prow = jl[k]; /* 1st pivot row */ 3072 3073 while (prow < k) { 3074 nextprow = jl[prow]; 3075 /* merge prow into k-th row */ 3076 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3077 jmax = ui[prow+1]; 3078 ncols = jmax-jmin; 3079 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3080 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3081 nzk += nlnk; 3082 3083 /* update il and jl for prow */ 3084 if (jmin < jmax) { 3085 il[prow] = jmin; 3086 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3087 } 3088 prow = nextprow; 3089 } 3090 3091 /* if free space is not available, make more free space */ 3092 if (current_space->local_remaining<nzk) { 3093 i = am - k + 1; /* num of unfactored rows */ 3094 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3095 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3096 reallocs++; 3097 } 3098 3099 /* copy data into free space, then initialize lnk */ 3100 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3101 3102 /* add the k-th row into il and jl */ 3103 if (nzk-1 > 0) { 3104 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3105 jl[k] = jl[i]; jl[i] = k; 3106 il[k] = ui[k] + 1; 3107 } 3108 ui_ptr[k] = current_space->array; 3109 3110 current_space->array += nzk; 3111 current_space->local_used += nzk; 3112 current_space->local_remaining -= nzk; 3113 3114 ui[k+1] = ui[k] + nzk; 3115 } 3116 3117 #if defined(PETSC_USE_INFO) 3118 if (ai[am] != 0) { 3119 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3120 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 3121 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 3122 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 3123 } else { 3124 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3125 } 3126 #endif 3127 3128 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3129 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3130 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3131 3132 /* destroy list of free space and other temporary array(s) */ 3133 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 3134 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3135 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3136 3137 /* put together the new matrix in MATSEQSBAIJ format */ 3138 3139 b = (Mat_SeqSBAIJ*)fact->data; 3140 b->singlemalloc = PETSC_FALSE; 3141 b->free_a = PETSC_TRUE; 3142 b->free_ij = PETSC_TRUE; 3143 3144 ierr = PetscMalloc1((ui[am]+1),&b->a);CHKERRQ(ierr); 3145 3146 b->j = uj; 3147 b->i = ui; 3148 b->diag = 0; 3149 b->ilen = 0; 3150 b->imax = 0; 3151 b->row = perm; 3152 b->col = perm; 3153 3154 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3155 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3156 3157 b->icol = iperm; 3158 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3159 3160 ierr = PetscMalloc1((am+1),&b->solve_work);CHKERRQ(ierr); 3161 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3162 b->maxnz = b->nz = ui[am]; 3163 3164 fact->info.factor_mallocs = reallocs; 3165 fact->info.fill_ratio_given = fill; 3166 if (ai[am] != 0) { 3167 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3168 } else { 3169 fact->info.fill_ratio_needed = 0.0; 3170 } 3171 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3172 PetscFunctionReturn(0); 3173 } 3174 3175 #undef __FUNCT__ 3176 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3177 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3178 { 3179 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3180 PetscErrorCode ierr; 3181 PetscInt n = A->rmap->n; 3182 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3183 PetscScalar *x,sum; 3184 const PetscScalar *b; 3185 const MatScalar *aa = a->a,*v; 3186 PetscInt i,nz; 3187 3188 PetscFunctionBegin; 3189 if (!n) PetscFunctionReturn(0); 3190 3191 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3193 3194 /* forward solve the lower triangular */ 3195 x[0] = b[0]; 3196 v = aa; 3197 vi = aj; 3198 for (i=1; i<n; i++) { 3199 nz = ai[i+1] - ai[i]; 3200 sum = b[i]; 3201 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3202 v += nz; 3203 vi += nz; 3204 x[i] = sum; 3205 } 3206 3207 /* backward solve the upper triangular */ 3208 for (i=n-1; i>=0; i--) { 3209 v = aa + adiag[i+1] + 1; 3210 vi = aj + adiag[i+1] + 1; 3211 nz = adiag[i] - adiag[i+1]-1; 3212 sum = x[i]; 3213 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3214 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3215 } 3216 3217 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3218 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3219 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3220 PetscFunctionReturn(0); 3221 } 3222 3223 #undef __FUNCT__ 3224 #define __FUNCT__ "MatSolve_SeqAIJ" 3225 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3226 { 3227 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3228 IS iscol = a->col,isrow = a->row; 3229 PetscErrorCode ierr; 3230 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3231 const PetscInt *rout,*cout,*r,*c; 3232 PetscScalar *x,*tmp,sum; 3233 const PetscScalar *b; 3234 const MatScalar *aa = a->a,*v; 3235 3236 PetscFunctionBegin; 3237 if (!n) PetscFunctionReturn(0); 3238 3239 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3240 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3241 tmp = a->solve_work; 3242 3243 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3244 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3245 3246 /* forward solve the lower triangular */ 3247 tmp[0] = b[r[0]]; 3248 v = aa; 3249 vi = aj; 3250 for (i=1; i<n; i++) { 3251 nz = ai[i+1] - ai[i]; 3252 sum = b[r[i]]; 3253 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3254 tmp[i] = sum; 3255 v += nz; vi += nz; 3256 } 3257 3258 /* backward solve the upper triangular */ 3259 for (i=n-1; i>=0; i--) { 3260 v = aa + adiag[i+1]+1; 3261 vi = aj + adiag[i+1]+1; 3262 nz = adiag[i]-adiag[i+1]-1; 3263 sum = tmp[i]; 3264 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3265 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3266 } 3267 3268 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3269 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3270 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3271 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3272 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3273 PetscFunctionReturn(0); 3274 } 3275 3276 #undef __FUNCT__ 3277 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3278 /* 3279 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3280 */ 3281 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3282 { 3283 Mat B = *fact; 3284 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3285 IS isicol; 3286 PetscErrorCode ierr; 3287 const PetscInt *r,*ic; 3288 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3289 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3290 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3291 PetscInt nlnk,*lnk; 3292 PetscBT lnkbt; 3293 PetscBool row_identity,icol_identity; 3294 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3295 const PetscInt *ics; 3296 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3297 PetscReal dt =info->dt,shift=info->shiftamount; 3298 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3299 PetscBool missing; 3300 3301 PetscFunctionBegin; 3302 if (dt == PETSC_DEFAULT) dt = 0.005; 3303 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3304 3305 /* ------- symbolic factorization, can be reused ---------*/ 3306 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3307 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3308 adiag=a->diag; 3309 3310 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3311 3312 /* bdiag is location of diagonal in factor */ 3313 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3314 ierr = PetscMalloc1((n+1),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3315 3316 /* allocate row pointers bi */ 3317 ierr = PetscMalloc1((2*n+2),&bi);CHKERRQ(ierr); 3318 3319 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3320 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3321 nnz_max = ai[n]+2*n*dtcount+2; 3322 3323 ierr = PetscMalloc1((nnz_max+1),&bj);CHKERRQ(ierr); 3324 ierr = PetscMalloc1((nnz_max+1),&ba);CHKERRQ(ierr); 3325 3326 /* put together the new matrix */ 3327 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3328 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 3329 b = (Mat_SeqAIJ*)B->data; 3330 3331 b->free_a = PETSC_TRUE; 3332 b->free_ij = PETSC_TRUE; 3333 b->singlemalloc = PETSC_FALSE; 3334 3335 b->a = ba; 3336 b->j = bj; 3337 b->i = bi; 3338 b->diag = bdiag; 3339 b->ilen = 0; 3340 b->imax = 0; 3341 b->row = isrow; 3342 b->col = iscol; 3343 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3344 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3345 b->icol = isicol; 3346 3347 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 3348 ierr = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3349 b->maxnz = nnz_max; 3350 3351 B->factortype = MAT_FACTOR_ILUDT; 3352 B->info.factor_mallocs = 0; 3353 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3354 /* ------- end of symbolic factorization ---------*/ 3355 3356 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3357 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3358 ics = ic; 3359 3360 /* linked list for storing column indices of the active row */ 3361 nlnk = n + 1; 3362 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3363 3364 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3365 ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr); 3366 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3367 ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr); 3368 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3369 3370 bi[0] = 0; 3371 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3372 bdiag_rev[n] = bdiag[0]; 3373 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3374 for (i=0; i<n; i++) { 3375 /* copy initial fill into linked list */ 3376 nzi = ai[r[i]+1] - ai[r[i]]; 3377 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3378 nzi_al = adiag[r[i]] - ai[r[i]]; 3379 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3380 ajtmp = aj + ai[r[i]]; 3381 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3382 3383 /* load in initial (unfactored row) */ 3384 aatmp = a->a + ai[r[i]]; 3385 for (j=0; j<nzi; j++) { 3386 rtmp[ics[*ajtmp++]] = *aatmp++; 3387 } 3388 3389 /* add pivot rows into linked list */ 3390 row = lnk[n]; 3391 while (row < i) { 3392 nzi_bl = bi[row+1] - bi[row] + 1; 3393 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3394 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3395 nzi += nlnk; 3396 row = lnk[row]; 3397 } 3398 3399 /* copy data from lnk into jtmp, then initialize lnk */ 3400 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3401 3402 /* numerical factorization */ 3403 bjtmp = jtmp; 3404 row = *bjtmp++; /* 1st pivot row */ 3405 while (row < i) { 3406 pc = rtmp + row; 3407 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3408 multiplier = (*pc) * (*pv); 3409 *pc = multiplier; 3410 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3411 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3412 pv = ba + bdiag[row+1] + 1; 3413 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3414 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3415 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3416 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3417 } 3418 row = *bjtmp++; 3419 } 3420 3421 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3422 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3423 nzi_bl = 0; j = 0; 3424 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3425 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3426 nzi_bl++; j++; 3427 } 3428 nzi_bu = nzi - nzi_bl -1; 3429 while (j < nzi) { 3430 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3431 j++; 3432 } 3433 3434 bjtmp = bj + bi[i]; 3435 batmp = ba + bi[i]; 3436 /* apply level dropping rule to L part */ 3437 ncut = nzi_al + dtcount; 3438 if (ncut < nzi_bl) { 3439 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3440 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3441 } else { 3442 ncut = nzi_bl; 3443 } 3444 for (j=0; j<ncut; j++) { 3445 bjtmp[j] = jtmp[j]; 3446 batmp[j] = vtmp[j]; 3447 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3448 } 3449 bi[i+1] = bi[i] + ncut; 3450 nzi = ncut + 1; 3451 3452 /* apply level dropping rule to U part */ 3453 ncut = nzi_au + dtcount; 3454 if (ncut < nzi_bu) { 3455 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3456 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3457 } else { 3458 ncut = nzi_bu; 3459 } 3460 nzi += ncut; 3461 3462 /* mark bdiagonal */ 3463 bdiag[i+1] = bdiag[i] - (ncut + 1); 3464 bdiag_rev[n-i-1] = bdiag[i+1]; 3465 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3466 bjtmp = bj + bdiag[i]; 3467 batmp = ba + bdiag[i]; 3468 *bjtmp = i; 3469 *batmp = diag_tmp; /* rtmp[i]; */ 3470 if (*batmp == 0.0) { 3471 *batmp = dt+shift; 3472 /* printf(" row %d add shift %g\n",i,shift); */ 3473 } 3474 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3475 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3476 3477 bjtmp = bj + bdiag[i+1]+1; 3478 batmp = ba + bdiag[i+1]+1; 3479 for (k=0; k<ncut; k++) { 3480 bjtmp[k] = jtmp[nzi_bl+1+k]; 3481 batmp[k] = vtmp[nzi_bl+1+k]; 3482 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3483 } 3484 /* printf("\n"); */ 3485 3486 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3487 /* 3488 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3489 printf(" ----------------------------\n"); 3490 */ 3491 } /* for (i=0; i<n; i++) */ 3492 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3493 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3494 3495 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3496 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3497 3498 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3499 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3500 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3501 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3502 3503 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3504 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3505 3506 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3507 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3508 if (row_identity && icol_identity) { 3509 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3510 } else { 3511 B->ops->solve = MatSolve_SeqAIJ; 3512 } 3513 3514 B->ops->solveadd = 0; 3515 B->ops->solvetranspose = 0; 3516 B->ops->solvetransposeadd = 0; 3517 B->ops->matsolve = 0; 3518 B->assembled = PETSC_TRUE; 3519 B->preallocated = PETSC_TRUE; 3520 PetscFunctionReturn(0); 3521 } 3522 3523 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3524 #undef __FUNCT__ 3525 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3526 /* 3527 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3528 */ 3529 3530 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3531 { 3532 PetscErrorCode ierr; 3533 3534 PetscFunctionBegin; 3535 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3536 PetscFunctionReturn(0); 3537 } 3538 3539 /* 3540 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3541 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3542 */ 3543 #undef __FUNCT__ 3544 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3545 /* 3546 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3547 */ 3548 3549 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3550 { 3551 Mat C =fact; 3552 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3553 IS isrow = b->row,isicol = b->icol; 3554 PetscErrorCode ierr; 3555 const PetscInt *r,*ic,*ics; 3556 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3557 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3558 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3559 PetscReal dt=info->dt,shift=info->shiftamount; 3560 PetscBool row_identity, col_identity; 3561 3562 PetscFunctionBegin; 3563 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3564 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3565 ierr = PetscMalloc1((n+1),&rtmp);CHKERRQ(ierr); 3566 ics = ic; 3567 3568 for (i=0; i<n; i++) { 3569 /* initialize rtmp array */ 3570 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3571 bjtmp = bj + bi[i]; 3572 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3573 rtmp[i] = 0.0; 3574 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3575 bjtmp = bj + bdiag[i+1] + 1; 3576 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3577 3578 /* load in initial unfactored row of A */ 3579 /* printf("row %d\n",i); */ 3580 nz = ai[r[i]+1] - ai[r[i]]; 3581 ajtmp = aj + ai[r[i]]; 3582 v = aa + ai[r[i]]; 3583 for (j=0; j<nz; j++) { 3584 rtmp[ics[*ajtmp++]] = v[j]; 3585 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3586 } 3587 /* printf("\n"); */ 3588 3589 /* numerical factorization */ 3590 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3591 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3592 k = 0; 3593 while (k < nzl) { 3594 row = *bjtmp++; 3595 /* printf(" prow %d\n",row); */ 3596 pc = rtmp + row; 3597 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3598 multiplier = (*pc) * (*pv); 3599 *pc = multiplier; 3600 if (PetscAbsScalar(multiplier) > dt) { 3601 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3602 pv = b->a + bdiag[row+1] + 1; 3603 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3604 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3605 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3606 } 3607 k++; 3608 } 3609 3610 /* finished row so stick it into b->a */ 3611 /* L-part */ 3612 pv = b->a + bi[i]; 3613 pj = bj + bi[i]; 3614 nzl = bi[i+1] - bi[i]; 3615 for (j=0; j<nzl; j++) { 3616 pv[j] = rtmp[pj[j]]; 3617 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3618 } 3619 3620 /* diagonal: invert diagonal entries for simplier triangular solves */ 3621 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3622 b->a[bdiag[i]] = 1.0/rtmp[i]; 3623 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3624 3625 /* U-part */ 3626 pv = b->a + bdiag[i+1] + 1; 3627 pj = bj + bdiag[i+1] + 1; 3628 nzu = bdiag[i] - bdiag[i+1] - 1; 3629 for (j=0; j<nzu; j++) { 3630 pv[j] = rtmp[pj[j]]; 3631 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3632 } 3633 /* printf("\n"); */ 3634 } 3635 3636 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3637 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3638 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3639 3640 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3641 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3642 if (row_identity && col_identity) { 3643 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3644 } else { 3645 C->ops->solve = MatSolve_SeqAIJ; 3646 } 3647 C->ops->solveadd = 0; 3648 C->ops->solvetranspose = 0; 3649 C->ops->solvetransposeadd = 0; 3650 C->ops->matsolve = 0; 3651 C->assembled = PETSC_TRUE; 3652 C->preallocated = PETSC_TRUE; 3653 3654 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3655 PetscFunctionReturn(0); 3656 } 3657