1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 EXTERN_C_BEGIN 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 10 /* 11 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 12 13 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 14 */ 15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 16 { 17 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 20 const PetscInt *ai = a->i, *aj = a->j; 21 const PetscScalar *aa = a->a; 22 PetscBool *done; 23 PetscReal best,past = 0,future; 24 25 PetscFunctionBegin; 26 /* pick initial row */ 27 best = -1; 28 for (i=0; i<n; i++) { 29 future = 0.0; 30 for (j=ai[i]; j<ai[i+1]; j++) { 31 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 32 else past = PetscAbsScalar(aa[j]); 33 } 34 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 35 if (past/future > best) { 36 best = past/future; 37 current = i; 38 } 39 } 40 41 ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr); 42 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 43 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 44 order[0] = current; 45 for (i=0; i<n-1; i++) { 46 done[current] = PETSC_TRUE; 47 best = -1; 48 /* loop over all neighbors of current pivot */ 49 for (j=ai[current]; j<ai[current+1]; j++) { 50 jj = aj[j]; 51 if (done[jj]) continue; 52 /* loop over columns of potential next row computing weights for below and above diagonal */ 53 past = future = 0.0; 54 for (k=ai[jj]; k<ai[jj+1]; k++) { 55 kk = aj[k]; 56 if (done[kk]) past += PetscAbsScalar(aa[k]); 57 else if (kk != jj) future += PetscAbsScalar(aa[k]); 58 } 59 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 60 if (past/future > best) { 61 best = past/future; 62 newcurrent = jj; 63 } 64 } 65 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 66 best = -1; 67 for (k=0; k<n; k++) { 68 if (done[k]) continue; 69 future = 0.0; 70 past = 0.0; 71 for (j=ai[k]; j<ai[k+1]; j++) { 72 kk = aj[j]; 73 if (done[kk]) past += PetscAbsScalar(aa[j]); 74 else if (kk != k) future += PetscAbsScalar(aa[j]); 75 } 76 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 77 if (past/future > best) { 78 best = past/future; 79 newcurrent = k; 80 } 81 } 82 } 83 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 84 current = newcurrent; 85 order[i+1] = current; 86 } 87 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 88 *icol = *irow; 89 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 90 ierr = PetscFree(done);CHKERRQ(ierr); 91 ierr = PetscFree(order);CHKERRQ(ierr); 92 PetscFunctionReturn(0); 93 } 94 EXTERN_C_END 95 96 EXTERN_C_BEGIN 97 #undef __FUNCT__ 98 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 99 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 100 { 101 PetscFunctionBegin; 102 *flg = PETSC_TRUE; 103 PetscFunctionReturn(0); 104 } 105 EXTERN_C_END 106 107 EXTERN_C_BEGIN 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 110 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 111 { 112 PetscInt n = A->rmap->n; 113 PetscErrorCode ierr; 114 115 PetscFunctionBegin; 116 #if defined(PETSC_USE_COMPLEX) 117 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 118 #endif 119 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 120 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 121 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 122 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 123 124 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 125 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 126 127 ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 128 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 129 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 130 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 131 132 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 133 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 134 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 135 (*B)->factortype = ftype; 136 PetscFunctionReturn(0); 137 } 138 EXTERN_C_END 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 142 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 143 { 144 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 145 IS isicol; 146 PetscErrorCode ierr; 147 const PetscInt *r,*ic; 148 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 149 PetscInt *bi,*bj,*ajtmp; 150 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 151 PetscReal f; 152 PetscInt nlnk,*lnk,k,**bi_ptr; 153 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 154 PetscBT lnkbt; 155 156 PetscFunctionBegin; 157 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 158 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 159 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 160 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 161 162 /* get new row pointers */ 163 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 164 bi[0] = 0; 165 166 /* bdiag is location of diagonal in factor */ 167 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 168 bdiag[0] = 0; 169 170 /* linked list for storing column indices of the active row */ 171 nlnk = n + 1; 172 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 173 174 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 175 176 /* initial FreeSpace size is f*(ai[n]+1) */ 177 f = info->fill; 178 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 179 current_space = free_space; 180 181 for (i=0; i<n; i++) { 182 /* copy previous fill into linked list */ 183 nzi = 0; 184 nnz = ai[r[i]+1] - ai[r[i]]; 185 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 186 ajtmp = aj + ai[r[i]]; 187 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 188 nzi += nlnk; 189 190 /* add pivot rows into linked list */ 191 row = lnk[n]; 192 while (row < i) { 193 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 194 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 195 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 196 nzi += nlnk; 197 row = lnk[row]; 198 } 199 bi[i+1] = bi[i] + nzi; 200 im[i] = nzi; 201 202 /* mark bdiag */ 203 nzbd = 0; 204 nnz = nzi; 205 k = lnk[n]; 206 while (nnz-- && k < i) { 207 nzbd++; 208 k = lnk[k]; 209 } 210 bdiag[i] = bi[i] + nzbd; 211 212 /* if free space is not available, make more free space */ 213 if (current_space->local_remaining<nzi) { 214 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 215 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 216 reallocs++; 217 } 218 219 /* copy data into free space, then initialize lnk */ 220 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 221 222 bi_ptr[i] = current_space->array; 223 current_space->array += nzi; 224 current_space->local_used += nzi; 225 current_space->local_remaining -= nzi; 226 } 227 #if defined(PETSC_USE_INFO) 228 if (ai[n] != 0) { 229 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 230 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 231 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 232 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 233 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 234 } else { 235 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 236 } 237 #endif 238 239 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 240 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 241 242 /* destroy list of free space and other temporary array(s) */ 243 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 244 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 245 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 246 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 247 248 /* put together the new matrix */ 249 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 250 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 251 b = (Mat_SeqAIJ*)(B)->data; 252 253 b->free_a = PETSC_TRUE; 254 b->free_ij = PETSC_TRUE; 255 b->singlemalloc = PETSC_FALSE; 256 257 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 258 b->j = bj; 259 b->i = bi; 260 b->diag = bdiag; 261 b->ilen = 0; 262 b->imax = 0; 263 b->row = isrow; 264 b->col = iscol; 265 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 266 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 267 b->icol = isicol; 268 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 269 270 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 271 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 272 b->maxnz = b->nz = bi[n]; 273 274 (B)->factortype = MAT_FACTOR_LU; 275 (B)->info.factor_mallocs = reallocs; 276 (B)->info.fill_ratio_given = f; 277 278 if (ai[n]) { 279 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 280 } else { 281 (B)->info.fill_ratio_needed = 0.0; 282 } 283 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 284 if (a->inode.size) { 285 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 286 } 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 292 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 293 { 294 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 295 IS isicol; 296 PetscErrorCode ierr; 297 const PetscInt *r,*ic; 298 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 299 PetscInt *bi,*bj,*ajtmp; 300 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 301 PetscReal f; 302 PetscInt nlnk,*lnk,k,**bi_ptr; 303 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 304 PetscBT lnkbt; 305 306 PetscFunctionBegin; 307 /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 308 /* 309 PetscBool olddatastruct=PETSC_FALSE; 310 ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 311 if (olddatastruct) { 312 ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 313 PetscFunctionReturn(0); 314 } 315 */ 316 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 317 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 318 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 319 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 320 321 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 322 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 323 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 324 bi[0] = bdiag[0] = 0; 325 326 /* linked list for storing column indices of the active row */ 327 nlnk = n + 1; 328 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 329 330 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 331 332 /* initial FreeSpace size is f*(ai[n]+1) */ 333 f = info->fill; 334 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 335 current_space = free_space; 336 337 for (i=0; i<n; i++) { 338 /* copy previous fill into linked list */ 339 nzi = 0; 340 nnz = ai[r[i]+1] - ai[r[i]]; 341 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 342 ajtmp = aj + ai[r[i]]; 343 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 344 nzi += nlnk; 345 346 /* add pivot rows into linked list */ 347 row = lnk[n]; 348 while (row < i) { 349 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 350 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 351 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 352 nzi += nlnk; 353 row = lnk[row]; 354 } 355 bi[i+1] = bi[i] + nzi; 356 im[i] = nzi; 357 358 /* mark bdiag */ 359 nzbd = 0; 360 nnz = nzi; 361 k = lnk[n]; 362 while (nnz-- && k < i) { 363 nzbd++; 364 k = lnk[k]; 365 } 366 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 367 368 /* if free space is not available, make more free space */ 369 if (current_space->local_remaining<nzi) { 370 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 371 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 372 reallocs++; 373 } 374 375 /* copy data into free space, then initialize lnk */ 376 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 377 378 bi_ptr[i] = current_space->array; 379 current_space->array += nzi; 380 current_space->local_used += nzi; 381 current_space->local_remaining -= nzi; 382 } 383 384 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 385 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 386 387 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 388 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 389 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 390 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 391 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 392 393 /* put together the new matrix */ 394 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 395 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 396 b = (Mat_SeqAIJ*)(B)->data; 397 398 b->free_a = PETSC_TRUE; 399 b->free_ij = PETSC_TRUE; 400 b->singlemalloc = PETSC_FALSE; 401 402 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 403 404 b->j = bj; 405 b->i = bi; 406 b->diag = bdiag; 407 b->ilen = 0; 408 b->imax = 0; 409 b->row = isrow; 410 b->col = iscol; 411 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 412 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 413 b->icol = isicol; 414 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 415 416 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 417 ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 418 b->maxnz = b->nz = bdiag[0]+1; 419 420 B->factortype = MAT_FACTOR_LU; 421 B->info.factor_mallocs = reallocs; 422 B->info.fill_ratio_given = f; 423 424 if (ai[n]) { 425 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 426 } else { 427 B->info.fill_ratio_needed = 0.0; 428 } 429 #if defined(PETSC_USE_INFO) 430 if (ai[n] != 0) { 431 PetscReal af = B->info.fill_ratio_needed; 432 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 433 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 434 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 435 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 436 } else { 437 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 438 } 439 #endif 440 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 441 if (a->inode.size) { 442 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 443 } 444 PetscFunctionReturn(0); 445 } 446 447 /* 448 Trouble in factorization, should we dump the original matrix? 449 */ 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatFactorDumpMatrix" 452 PetscErrorCode MatFactorDumpMatrix(Mat A) 453 { 454 PetscErrorCode ierr; 455 PetscBool flg = PETSC_FALSE; 456 457 PetscFunctionBegin; 458 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 459 if (flg) { 460 PetscViewer viewer; 461 char filename[PETSC_MAX_PATH_LEN]; 462 463 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 464 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 465 ierr = MatView(A,viewer);CHKERRQ(ierr); 466 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 467 } 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 473 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 474 { 475 Mat C =B; 476 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 477 IS isrow = b->row,isicol = b->icol; 478 PetscErrorCode ierr; 479 const PetscInt *r,*ic,*ics; 480 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 481 PetscInt i,j,k,nz,nzL,row,*pj; 482 const PetscInt *ajtmp,*bjtmp; 483 MatScalar *rtmp,*pc,multiplier,*pv; 484 const MatScalar *aa=a->a,*v; 485 PetscBool row_identity,col_identity; 486 FactorShiftCtx sctx; 487 const PetscInt *ddiag; 488 PetscReal rs; 489 MatScalar d; 490 491 PetscFunctionBegin; 492 /* MatPivotSetUp(): initialize shift context sctx */ 493 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 494 495 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 496 ddiag = a->diag; 497 sctx.shift_top = info->zeropivot; 498 for (i=0; i<n; i++) { 499 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 500 d = (aa)[ddiag[i]]; 501 rs = -PetscAbsScalar(d) - PetscRealPart(d); 502 v = aa+ai[i]; 503 nz = ai[i+1] - ai[i]; 504 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 505 if (rs>sctx.shift_top) sctx.shift_top = rs; 506 } 507 sctx.shift_top *= 1.1; 508 sctx.nshift_max = 5; 509 sctx.shift_lo = 0.; 510 sctx.shift_hi = 1.; 511 } 512 513 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 514 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 515 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 516 ics = ic; 517 518 do { 519 sctx.newshift = PETSC_FALSE; 520 for (i=0; i<n; i++) { 521 /* zero rtmp */ 522 /* L part */ 523 nz = bi[i+1] - bi[i]; 524 bjtmp = bj + bi[i]; 525 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 526 527 /* U part */ 528 nz = bdiag[i]-bdiag[i+1]; 529 bjtmp = bj + bdiag[i+1]+1; 530 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 531 532 /* load in initial (unfactored row) */ 533 nz = ai[r[i]+1] - ai[r[i]]; 534 ajtmp = aj + ai[r[i]]; 535 v = aa + ai[r[i]]; 536 for (j=0; j<nz; j++) { 537 rtmp[ics[ajtmp[j]]] = v[j]; 538 } 539 /* ZeropivotApply() */ 540 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 541 542 /* elimination */ 543 bjtmp = bj + bi[i]; 544 row = *bjtmp++; 545 nzL = bi[i+1] - bi[i]; 546 for (k=0; k < nzL; k++) { 547 pc = rtmp + row; 548 if (*pc != 0.0) { 549 pv = b->a + bdiag[row]; 550 multiplier = *pc * (*pv); 551 *pc = multiplier; 552 553 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 554 pv = b->a + bdiag[row+1]+1; 555 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 556 557 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 558 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 559 } 560 row = *bjtmp++; 561 } 562 563 /* finished row so stick it into b->a */ 564 rs = 0.0; 565 /* L part */ 566 pv = b->a + bi[i]; 567 pj = b->j + bi[i]; 568 nz = bi[i+1] - bi[i]; 569 for (j=0; j<nz; j++) { 570 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 571 } 572 573 /* U part */ 574 pv = b->a + bdiag[i+1]+1; 575 pj = b->j + bdiag[i+1]+1; 576 nz = bdiag[i] - bdiag[i+1]-1; 577 for (j=0; j<nz; j++) { 578 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 579 } 580 581 sctx.rs = rs; 582 sctx.pv = rtmp[i]; 583 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 584 if (sctx.newshift) break; /* break for-loop */ 585 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 586 587 /* Mark diagonal and invert diagonal for simplier triangular solves */ 588 pv = b->a + bdiag[i]; 589 *pv = 1.0/rtmp[i]; 590 591 } /* endof for (i=0; i<n; i++) { */ 592 593 /* MatPivotRefine() */ 594 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 595 /* 596 * if no shift in this attempt & shifting & started shifting & can refine, 597 * then try lower shift 598 */ 599 sctx.shift_hi = sctx.shift_fraction; 600 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 601 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 602 sctx.newshift = PETSC_TRUE; 603 sctx.nshift++; 604 } 605 } while (sctx.newshift); 606 607 ierr = PetscFree(rtmp);CHKERRQ(ierr); 608 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 609 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 610 611 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 612 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 613 if (row_identity && col_identity) { 614 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 615 } else { 616 C->ops->solve = MatSolve_SeqAIJ; 617 } 618 C->ops->solveadd = MatSolveAdd_SeqAIJ; 619 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 620 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 621 C->ops->matsolve = MatMatSolve_SeqAIJ; 622 C->assembled = PETSC_TRUE; 623 C->preallocated = PETSC_TRUE; 624 625 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 626 627 /* MatShiftView(A,info,&sctx) */ 628 if (sctx.nshift) { 629 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 630 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 631 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 632 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 633 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 634 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 635 } 636 } 637 ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr); 638 PetscFunctionReturn(0); 639 } 640 641 #undef __FUNCT__ 642 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 643 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 644 { 645 Mat C =B; 646 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 647 IS isrow = b->row,isicol = b->icol; 648 PetscErrorCode ierr; 649 const PetscInt *r,*ic,*ics; 650 PetscInt nz,row,i,j,n=A->rmap->n,diag; 651 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 652 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 653 MatScalar *pv,*rtmp,*pc,multiplier,d; 654 const MatScalar *v,*aa=a->a; 655 PetscReal rs=0.0; 656 FactorShiftCtx sctx; 657 const PetscInt *ddiag; 658 PetscBool row_identity, col_identity; 659 660 PetscFunctionBegin; 661 /* MatPivotSetUp(): initialize shift context sctx */ 662 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 663 664 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 665 ddiag = a->diag; 666 sctx.shift_top = info->zeropivot; 667 for (i=0; i<n; i++) { 668 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 669 d = (aa)[ddiag[i]]; 670 rs = -PetscAbsScalar(d) - PetscRealPart(d); 671 v = aa+ai[i]; 672 nz = ai[i+1] - ai[i]; 673 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 674 if (rs>sctx.shift_top) sctx.shift_top = rs; 675 } 676 sctx.shift_top *= 1.1; 677 sctx.nshift_max = 5; 678 sctx.shift_lo = 0.; 679 sctx.shift_hi = 1.; 680 } 681 682 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 683 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 684 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 685 ics = ic; 686 687 do { 688 sctx.newshift = PETSC_FALSE; 689 for (i=0; i<n; i++) { 690 nz = bi[i+1] - bi[i]; 691 bjtmp = bj + bi[i]; 692 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 693 694 /* load in initial (unfactored row) */ 695 nz = ai[r[i]+1] - ai[r[i]]; 696 ajtmp = aj + ai[r[i]]; 697 v = aa + ai[r[i]]; 698 for (j=0; j<nz; j++) { 699 rtmp[ics[ajtmp[j]]] = v[j]; 700 } 701 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 702 703 row = *bjtmp++; 704 while (row < i) { 705 pc = rtmp + row; 706 if (*pc != 0.0) { 707 pv = b->a + diag_offset[row]; 708 pj = b->j + diag_offset[row] + 1; 709 multiplier = *pc / *pv++; 710 *pc = multiplier; 711 nz = bi[row+1] - diag_offset[row] - 1; 712 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 713 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 714 } 715 row = *bjtmp++; 716 } 717 /* finished row so stick it into b->a */ 718 pv = b->a + bi[i]; 719 pj = b->j + bi[i]; 720 nz = bi[i+1] - bi[i]; 721 diag = diag_offset[i] - bi[i]; 722 rs = 0.0; 723 for (j=0; j<nz; j++) { 724 pv[j] = rtmp[pj[j]]; 725 rs += PetscAbsScalar(pv[j]); 726 } 727 rs -= PetscAbsScalar(pv[diag]); 728 729 sctx.rs = rs; 730 sctx.pv = pv[diag]; 731 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 732 if (sctx.newshift) break; 733 pv[diag] = sctx.pv; 734 } 735 736 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 737 /* 738 * if no shift in this attempt & shifting & started shifting & can refine, 739 * then try lower shift 740 */ 741 sctx.shift_hi = sctx.shift_fraction; 742 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 743 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 744 sctx.newshift = PETSC_TRUE; 745 sctx.nshift++; 746 } 747 } while (sctx.newshift); 748 749 /* invert diagonal entries for simplier triangular solves */ 750 for (i=0; i<n; i++) { 751 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 752 } 753 ierr = PetscFree(rtmp);CHKERRQ(ierr); 754 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 755 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 756 757 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 758 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 759 if (row_identity && col_identity) { 760 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 761 } else { 762 C->ops->solve = MatSolve_SeqAIJ_inplace; 763 } 764 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 765 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 766 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 767 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 768 769 C->assembled = PETSC_TRUE; 770 C->preallocated = PETSC_TRUE; 771 772 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 773 if (sctx.nshift) { 774 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 775 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 776 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 777 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 778 } 779 } 780 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 781 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 782 783 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 784 PetscFunctionReturn(0); 785 } 786 787 /* 788 This routine implements inplace ILU(0) with row or/and column permutations. 789 Input: 790 A - original matrix 791 Output; 792 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 793 a->j (col index) is permuted by the inverse of colperm, then sorted 794 a->a reordered accordingly with a->j 795 a->diag (ptr to diagonal elements) is updated. 796 */ 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 799 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 800 { 801 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 802 IS isrow = a->row,isicol = a->icol; 803 PetscErrorCode ierr; 804 const PetscInt *r,*ic,*ics; 805 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 806 PetscInt *ajtmp,nz,row; 807 PetscInt *diag = a->diag,nbdiag,*pj; 808 PetscScalar *rtmp,*pc,multiplier,d; 809 MatScalar *pv,*v; 810 PetscReal rs; 811 FactorShiftCtx sctx; 812 const MatScalar *aa=a->a,*vtmp; 813 814 PetscFunctionBegin; 815 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 816 817 /* MatPivotSetUp(): initialize shift context sctx */ 818 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 819 820 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 821 const PetscInt *ddiag = a->diag; 822 sctx.shift_top = info->zeropivot; 823 for (i=0; i<n; i++) { 824 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 825 d = (aa)[ddiag[i]]; 826 rs = -PetscAbsScalar(d) - PetscRealPart(d); 827 vtmp = aa+ai[i]; 828 nz = ai[i+1] - ai[i]; 829 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 830 if (rs>sctx.shift_top) sctx.shift_top = rs; 831 } 832 sctx.shift_top *= 1.1; 833 sctx.nshift_max = 5; 834 sctx.shift_lo = 0.; 835 sctx.shift_hi = 1.; 836 } 837 838 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 839 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 840 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 841 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 842 ics = ic; 843 844 #if defined(MV) 845 sctx.shift_top = 0.; 846 sctx.nshift_max = 0; 847 sctx.shift_lo = 0.; 848 sctx.shift_hi = 0.; 849 sctx.shift_fraction = 0.; 850 851 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 852 sctx.shift_top = 0.; 853 for (i=0; i<n; i++) { 854 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 855 d = (a->a)[diag[i]]; 856 rs = -PetscAbsScalar(d) - PetscRealPart(d); 857 v = a->a+ai[i]; 858 nz = ai[i+1] - ai[i]; 859 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 860 if (rs>sctx.shift_top) sctx.shift_top = rs; 861 } 862 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 863 sctx.shift_top *= 1.1; 864 sctx.nshift_max = 5; 865 sctx.shift_lo = 0.; 866 sctx.shift_hi = 1.; 867 } 868 869 sctx.shift_amount = 0.; 870 sctx.nshift = 0; 871 #endif 872 873 do { 874 sctx.newshift = PETSC_FALSE; 875 for (i=0; i<n; i++) { 876 /* load in initial unfactored row */ 877 nz = ai[r[i]+1] - ai[r[i]]; 878 ajtmp = aj + ai[r[i]]; 879 v = a->a + ai[r[i]]; 880 /* sort permuted ajtmp and values v accordingly */ 881 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 882 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 883 884 diag[r[i]] = ai[r[i]]; 885 for (j=0; j<nz; j++) { 886 rtmp[ajtmp[j]] = v[j]; 887 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 888 } 889 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 890 891 row = *ajtmp++; 892 while (row < i) { 893 pc = rtmp + row; 894 if (*pc != 0.0) { 895 pv = a->a + diag[r[row]]; 896 pj = aj + diag[r[row]] + 1; 897 898 multiplier = *pc / *pv++; 899 *pc = multiplier; 900 nz = ai[r[row]+1] - diag[r[row]] - 1; 901 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 902 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 903 } 904 row = *ajtmp++; 905 } 906 /* finished row so overwrite it onto a->a */ 907 pv = a->a + ai[r[i]]; 908 pj = aj + ai[r[i]]; 909 nz = ai[r[i]+1] - ai[r[i]]; 910 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 911 912 rs = 0.0; 913 for (j=0; j<nz; j++) { 914 pv[j] = rtmp[pj[j]]; 915 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 916 } 917 918 sctx.rs = rs; 919 sctx.pv = pv[nbdiag]; 920 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 921 if (sctx.newshift) break; 922 pv[nbdiag] = sctx.pv; 923 } 924 925 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 926 /* 927 * if no shift in this attempt & shifting & started shifting & can refine, 928 * then try lower shift 929 */ 930 sctx.shift_hi = sctx.shift_fraction; 931 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 932 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 933 sctx.newshift = PETSC_TRUE; 934 sctx.nshift++; 935 } 936 } while (sctx.newshift); 937 938 /* invert diagonal entries for simplier triangular solves */ 939 for (i=0; i<n; i++) { 940 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 941 } 942 943 ierr = PetscFree(rtmp);CHKERRQ(ierr); 944 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 945 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 946 947 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 948 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 949 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 950 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 951 952 A->assembled = PETSC_TRUE; 953 A->preallocated = PETSC_TRUE; 954 955 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 956 if (sctx.nshift) { 957 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 958 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 959 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 960 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 961 } 962 } 963 PetscFunctionReturn(0); 964 } 965 966 /* ----------------------------------------------------------- */ 967 #undef __FUNCT__ 968 #define __FUNCT__ "MatLUFactor_SeqAIJ" 969 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 970 { 971 PetscErrorCode ierr; 972 Mat C; 973 974 PetscFunctionBegin; 975 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 976 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 977 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 978 979 A->ops->solve = C->ops->solve; 980 A->ops->solvetranspose = C->ops->solvetranspose; 981 982 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 983 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 /* ----------------------------------------------------------- */ 987 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 991 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 992 { 993 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 994 IS iscol = a->col,isrow = a->row; 995 PetscErrorCode ierr; 996 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 997 PetscInt nz; 998 const PetscInt *rout,*cout,*r,*c; 999 PetscScalar *x,*tmp,*tmps,sum; 1000 const PetscScalar *b; 1001 const MatScalar *aa = a->a,*v; 1002 1003 PetscFunctionBegin; 1004 if (!n) PetscFunctionReturn(0); 1005 1006 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1007 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1008 tmp = a->solve_work; 1009 1010 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1011 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1012 1013 /* forward solve the lower triangular */ 1014 tmp[0] = b[*r++]; 1015 tmps = tmp; 1016 for (i=1; i<n; i++) { 1017 v = aa + ai[i]; 1018 vi = aj + ai[i]; 1019 nz = a->diag[i] - ai[i]; 1020 sum = b[*r++]; 1021 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1022 tmp[i] = sum; 1023 } 1024 1025 /* backward solve the upper triangular */ 1026 for (i=n-1; i>=0; i--) { 1027 v = aa + a->diag[i] + 1; 1028 vi = aj + a->diag[i] + 1; 1029 nz = ai[i+1] - a->diag[i] - 1; 1030 sum = tmp[i]; 1031 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1032 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1033 } 1034 1035 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1036 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1037 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1038 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1039 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1040 PetscFunctionReturn(0); 1041 } 1042 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1045 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1046 { 1047 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1048 IS iscol = a->col,isrow = a->row; 1049 PetscErrorCode ierr; 1050 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1051 PetscInt nz,neq; 1052 const PetscInt *rout,*cout,*r,*c; 1053 PetscScalar *x,*b,*tmp,*tmps,sum; 1054 const MatScalar *aa = a->a,*v; 1055 PetscBool bisdense,xisdense; 1056 1057 PetscFunctionBegin; 1058 if (!n) PetscFunctionReturn(0); 1059 1060 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1061 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1062 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1063 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1064 1065 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1066 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1067 1068 tmp = a->solve_work; 1069 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1070 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1071 1072 for (neq=0; neq<B->cmap->n; neq++) { 1073 /* forward solve the lower triangular */ 1074 tmp[0] = b[r[0]]; 1075 tmps = tmp; 1076 for (i=1; i<n; i++) { 1077 v = aa + ai[i]; 1078 vi = aj + ai[i]; 1079 nz = a->diag[i] - ai[i]; 1080 sum = b[r[i]]; 1081 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1082 tmp[i] = sum; 1083 } 1084 /* backward solve the upper triangular */ 1085 for (i=n-1; i>=0; i--) { 1086 v = aa + a->diag[i] + 1; 1087 vi = aj + a->diag[i] + 1; 1088 nz = ai[i+1] - a->diag[i] - 1; 1089 sum = tmp[i]; 1090 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1091 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1092 } 1093 1094 b += n; 1095 x += n; 1096 } 1097 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1098 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1099 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1100 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1101 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1107 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1108 { 1109 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1110 IS iscol = a->col,isrow = a->row; 1111 PetscErrorCode ierr; 1112 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1113 PetscInt nz,neq; 1114 const PetscInt *rout,*cout,*r,*c; 1115 PetscScalar *x,*b,*tmp,sum; 1116 const MatScalar *aa = a->a,*v; 1117 PetscBool bisdense,xisdense; 1118 1119 PetscFunctionBegin; 1120 if (!n) PetscFunctionReturn(0); 1121 1122 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1123 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1124 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1125 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1126 1127 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1128 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1129 1130 tmp = a->solve_work; 1131 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1132 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1133 1134 for (neq=0; neq<B->cmap->n; neq++) { 1135 /* forward solve the lower triangular */ 1136 tmp[0] = b[r[0]]; 1137 v = aa; 1138 vi = aj; 1139 for (i=1; i<n; i++) { 1140 nz = ai[i+1] - ai[i]; 1141 sum = b[r[i]]; 1142 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1143 tmp[i] = sum; 1144 v += nz; vi += nz; 1145 } 1146 1147 /* backward solve the upper triangular */ 1148 for (i=n-1; i>=0; i--) { 1149 v = aa + adiag[i+1]+1; 1150 vi = aj + adiag[i+1]+1; 1151 nz = adiag[i]-adiag[i+1]-1; 1152 sum = tmp[i]; 1153 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1154 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1155 } 1156 1157 b += n; 1158 x += n; 1159 } 1160 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1161 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1162 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1163 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1164 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNCT__ 1169 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1170 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1171 { 1172 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1173 IS iscol = a->col,isrow = a->row; 1174 PetscErrorCode ierr; 1175 const PetscInt *r,*c,*rout,*cout; 1176 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1177 PetscInt nz,row; 1178 PetscScalar *x,*b,*tmp,*tmps,sum; 1179 const MatScalar *aa = a->a,*v; 1180 1181 PetscFunctionBegin; 1182 if (!n) PetscFunctionReturn(0); 1183 1184 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1185 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1186 tmp = a->solve_work; 1187 1188 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1189 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1190 1191 /* forward solve the lower triangular */ 1192 tmp[0] = b[*r++]; 1193 tmps = tmp; 1194 for (row=1; row<n; row++) { 1195 i = rout[row]; /* permuted row */ 1196 v = aa + ai[i]; 1197 vi = aj + ai[i]; 1198 nz = a->diag[i] - ai[i]; 1199 sum = b[*r++]; 1200 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1201 tmp[row] = sum; 1202 } 1203 1204 /* backward solve the upper triangular */ 1205 for (row=n-1; row>=0; row--) { 1206 i = rout[row]; /* permuted row */ 1207 v = aa + a->diag[i] + 1; 1208 vi = aj + a->diag[i] + 1; 1209 nz = ai[i+1] - a->diag[i] - 1; 1210 sum = tmp[row]; 1211 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1212 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1213 } 1214 1215 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1216 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1217 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1218 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1219 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 /* ----------------------------------------------------------- */ 1224 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1225 #undef __FUNCT__ 1226 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1227 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1228 { 1229 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1230 PetscErrorCode ierr; 1231 PetscInt n = A->rmap->n; 1232 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1233 PetscScalar *x; 1234 const PetscScalar *b; 1235 const MatScalar *aa = a->a; 1236 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1237 PetscInt adiag_i,i,nz,ai_i; 1238 const PetscInt *vi; 1239 const MatScalar *v; 1240 PetscScalar sum; 1241 #endif 1242 1243 PetscFunctionBegin; 1244 if (!n) PetscFunctionReturn(0); 1245 1246 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1247 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1248 1249 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1250 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1251 #else 1252 /* forward solve the lower triangular */ 1253 x[0] = b[0]; 1254 for (i=1; i<n; i++) { 1255 ai_i = ai[i]; 1256 v = aa + ai_i; 1257 vi = aj + ai_i; 1258 nz = adiag[i] - ai_i; 1259 sum = b[i]; 1260 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1261 x[i] = sum; 1262 } 1263 1264 /* backward solve the upper triangular */ 1265 for (i=n-1; i>=0; i--) { 1266 adiag_i = adiag[i]; 1267 v = aa + adiag_i + 1; 1268 vi = aj + adiag_i + 1; 1269 nz = ai[i+1] - adiag_i - 1; 1270 sum = x[i]; 1271 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1272 x[i] = sum*aa[adiag_i]; 1273 } 1274 #endif 1275 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1276 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1277 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1283 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1284 { 1285 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1286 IS iscol = a->col,isrow = a->row; 1287 PetscErrorCode ierr; 1288 PetscInt i, n = A->rmap->n,j; 1289 PetscInt nz; 1290 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1291 PetscScalar *x,*tmp,sum; 1292 const PetscScalar *b; 1293 const MatScalar *aa = a->a,*v; 1294 1295 PetscFunctionBegin; 1296 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1297 1298 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1299 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1300 tmp = a->solve_work; 1301 1302 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1303 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1304 1305 /* forward solve the lower triangular */ 1306 tmp[0] = b[*r++]; 1307 for (i=1; i<n; i++) { 1308 v = aa + ai[i]; 1309 vi = aj + ai[i]; 1310 nz = a->diag[i] - ai[i]; 1311 sum = b[*r++]; 1312 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1313 tmp[i] = sum; 1314 } 1315 1316 /* backward solve the upper triangular */ 1317 for (i=n-1; i>=0; i--) { 1318 v = aa + a->diag[i] + 1; 1319 vi = aj + a->diag[i] + 1; 1320 nz = ai[i+1] - a->diag[i] - 1; 1321 sum = tmp[i]; 1322 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1323 tmp[i] = sum*aa[a->diag[i]]; 1324 x[*c--] += tmp[i]; 1325 } 1326 1327 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1328 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1329 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1330 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1331 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1337 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1338 { 1339 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1340 IS iscol = a->col,isrow = a->row; 1341 PetscErrorCode ierr; 1342 PetscInt i, n = A->rmap->n,j; 1343 PetscInt nz; 1344 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1345 PetscScalar *x,*tmp,sum; 1346 const PetscScalar *b; 1347 const MatScalar *aa = a->a,*v; 1348 1349 PetscFunctionBegin; 1350 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1351 1352 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1353 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1354 tmp = a->solve_work; 1355 1356 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1357 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1358 1359 /* forward solve the lower triangular */ 1360 tmp[0] = b[r[0]]; 1361 v = aa; 1362 vi = aj; 1363 for (i=1; i<n; i++) { 1364 nz = ai[i+1] - ai[i]; 1365 sum = b[r[i]]; 1366 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1367 tmp[i] = sum; 1368 v += nz; 1369 vi += nz; 1370 } 1371 1372 /* backward solve the upper triangular */ 1373 v = aa + adiag[n-1]; 1374 vi = aj + adiag[n-1]; 1375 for (i=n-1; i>=0; i--) { 1376 nz = adiag[i] - adiag[i+1] - 1; 1377 sum = tmp[i]; 1378 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1379 tmp[i] = sum*v[nz]; 1380 x[c[i]] += tmp[i]; 1381 v += nz+1; vi += nz+1; 1382 } 1383 1384 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1385 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1386 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1387 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1388 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1394 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1395 { 1396 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1397 IS iscol = a->col,isrow = a->row; 1398 PetscErrorCode ierr; 1399 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1400 PetscInt i,n = A->rmap->n,j; 1401 PetscInt nz; 1402 PetscScalar *x,*tmp,s1; 1403 const MatScalar *aa = a->a,*v; 1404 const PetscScalar *b; 1405 1406 PetscFunctionBegin; 1407 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1408 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1409 tmp = a->solve_work; 1410 1411 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1412 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1413 1414 /* copy the b into temp work space according to permutation */ 1415 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1416 1417 /* forward solve the U^T */ 1418 for (i=0; i<n; i++) { 1419 v = aa + diag[i]; 1420 vi = aj + diag[i] + 1; 1421 nz = ai[i+1] - diag[i] - 1; 1422 s1 = tmp[i]; 1423 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1424 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1425 tmp[i] = s1; 1426 } 1427 1428 /* backward solve the L^T */ 1429 for (i=n-1; i>=0; i--) { 1430 v = aa + diag[i] - 1; 1431 vi = aj + diag[i] - 1; 1432 nz = diag[i] - ai[i]; 1433 s1 = tmp[i]; 1434 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1435 } 1436 1437 /* copy tmp into x according to permutation */ 1438 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1439 1440 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1441 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1442 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1443 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1444 1445 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1451 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1452 { 1453 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1454 IS iscol = a->col,isrow = a->row; 1455 PetscErrorCode ierr; 1456 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1457 PetscInt i,n = A->rmap->n,j; 1458 PetscInt nz; 1459 PetscScalar *x,*tmp,s1; 1460 const MatScalar *aa = a->a,*v; 1461 const PetscScalar *b; 1462 1463 PetscFunctionBegin; 1464 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1465 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1466 tmp = a->solve_work; 1467 1468 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1469 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1470 1471 /* copy the b into temp work space according to permutation */ 1472 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1473 1474 /* forward solve the U^T */ 1475 for (i=0; i<n; i++) { 1476 v = aa + adiag[i+1] + 1; 1477 vi = aj + adiag[i+1] + 1; 1478 nz = adiag[i] - adiag[i+1] - 1; 1479 s1 = tmp[i]; 1480 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1481 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1482 tmp[i] = s1; 1483 } 1484 1485 /* backward solve the L^T */ 1486 for (i=n-1; i>=0; i--) { 1487 v = aa + ai[i]; 1488 vi = aj + ai[i]; 1489 nz = ai[i+1] - ai[i]; 1490 s1 = tmp[i]; 1491 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1492 } 1493 1494 /* copy tmp into x according to permutation */ 1495 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1496 1497 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1498 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1499 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1500 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1501 1502 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1508 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1509 { 1510 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1511 IS iscol = a->col,isrow = a->row; 1512 PetscErrorCode ierr; 1513 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1514 PetscInt i,n = A->rmap->n,j; 1515 PetscInt nz; 1516 PetscScalar *x,*tmp,s1; 1517 const MatScalar *aa = a->a,*v; 1518 const PetscScalar *b; 1519 1520 PetscFunctionBegin; 1521 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1522 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1523 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1524 tmp = a->solve_work; 1525 1526 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1527 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1528 1529 /* copy the b into temp work space according to permutation */ 1530 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1531 1532 /* forward solve the U^T */ 1533 for (i=0; i<n; i++) { 1534 v = aa + diag[i]; 1535 vi = aj + diag[i] + 1; 1536 nz = ai[i+1] - diag[i] - 1; 1537 s1 = tmp[i]; 1538 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1539 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1540 tmp[i] = s1; 1541 } 1542 1543 /* backward solve the L^T */ 1544 for (i=n-1; i>=0; i--) { 1545 v = aa + diag[i] - 1; 1546 vi = aj + diag[i] - 1; 1547 nz = diag[i] - ai[i]; 1548 s1 = tmp[i]; 1549 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1550 } 1551 1552 /* copy tmp into x according to permutation */ 1553 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1554 1555 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1556 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1557 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1558 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1559 1560 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1566 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1567 { 1568 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1569 IS iscol = a->col,isrow = a->row; 1570 PetscErrorCode ierr; 1571 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1572 PetscInt i,n = A->rmap->n,j; 1573 PetscInt nz; 1574 PetscScalar *x,*tmp,s1; 1575 const MatScalar *aa = a->a,*v; 1576 const PetscScalar *b; 1577 1578 PetscFunctionBegin; 1579 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1580 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1581 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1582 tmp = a->solve_work; 1583 1584 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1585 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1586 1587 /* copy the b into temp work space according to permutation */ 1588 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1589 1590 /* forward solve the U^T */ 1591 for (i=0; i<n; i++) { 1592 v = aa + adiag[i+1] + 1; 1593 vi = aj + adiag[i+1] + 1; 1594 nz = adiag[i] - adiag[i+1] - 1; 1595 s1 = tmp[i]; 1596 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1597 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1598 tmp[i] = s1; 1599 } 1600 1601 1602 /* backward solve the L^T */ 1603 for (i=n-1; i>=0; i--) { 1604 v = aa + ai[i]; 1605 vi = aj + ai[i]; 1606 nz = ai[i+1] - ai[i]; 1607 s1 = tmp[i]; 1608 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1609 } 1610 1611 /* copy tmp into x according to permutation */ 1612 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1613 1614 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1615 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1616 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1617 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1618 1619 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 /* ----------------------------------------------------------------*/ 1624 1625 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool); 1626 1627 /* 1628 ilu() under revised new data structure. 1629 Factored arrays bj and ba are stored as 1630 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1631 1632 bi=fact->i is an array of size n+1, in which 1633 bi+ 1634 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1635 bi[n]: points to L(n-1,n-1)+1 1636 1637 bdiag=fact->diag is an array of size n+1,in which 1638 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1639 bdiag[n]: points to entry of U(n-1,0)-1 1640 1641 U(i,:) contains bdiag[i] as its last entry, i.e., 1642 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1643 */ 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1646 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1647 { 1648 1649 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1650 PetscErrorCode ierr; 1651 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1652 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1653 PetscBool missing; 1654 IS isicol; 1655 1656 PetscFunctionBegin; 1657 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1658 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1659 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1660 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1661 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1662 b = (Mat_SeqAIJ*)(fact)->data; 1663 1664 /* allocate matrix arrays for new data structure */ 1665 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1666 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1667 1668 b->singlemalloc = PETSC_TRUE; 1669 if (!b->diag) { 1670 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1671 ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1672 } 1673 bdiag = b->diag; 1674 1675 if (n > 0) { 1676 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1677 } 1678 1679 /* set bi and bj with new data structure */ 1680 bi = b->i; 1681 bj = b->j; 1682 1683 /* L part */ 1684 bi[0] = 0; 1685 for (i=0; i<n; i++) { 1686 nz = adiag[i] - ai[i]; 1687 bi[i+1] = bi[i] + nz; 1688 aj = a->j + ai[i]; 1689 for (j=0; j<nz; j++) { 1690 /* *bj = aj[j]; bj++; */ 1691 bj[k++] = aj[j]; 1692 } 1693 } 1694 1695 /* U part */ 1696 bdiag[n] = bi[n]-1; 1697 for (i=n-1; i>=0; i--) { 1698 nz = ai[i+1] - adiag[i] - 1; 1699 aj = a->j + adiag[i] + 1; 1700 for (j=0; j<nz; j++) { 1701 /* *bj = aj[j]; bj++; */ 1702 bj[k++] = aj[j]; 1703 } 1704 /* diag[i] */ 1705 /* *bj = i; bj++; */ 1706 bj[k++] = i; 1707 bdiag[i] = bdiag[i+1] + nz + 1; 1708 } 1709 1710 fact->factortype = MAT_FACTOR_ILU; 1711 fact->info.factor_mallocs = 0; 1712 fact->info.fill_ratio_given = info->fill; 1713 fact->info.fill_ratio_needed = 1.0; 1714 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1715 1716 b = (Mat_SeqAIJ*)(fact)->data; 1717 b->row = isrow; 1718 b->col = iscol; 1719 b->icol = isicol; 1720 ierr = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1721 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1722 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1728 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1729 { 1730 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1731 IS isicol; 1732 PetscErrorCode ierr; 1733 const PetscInt *r,*ic; 1734 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1735 PetscInt *bi,*cols,nnz,*cols_lvl; 1736 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1737 PetscInt i,levels,diagonal_fill; 1738 PetscBool col_identity,row_identity; 1739 PetscReal f; 1740 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1741 PetscBT lnkbt; 1742 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1743 PetscFreeSpaceList free_space =PETSC_NULL,current_space=PETSC_NULL; 1744 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1745 1746 PetscFunctionBegin; 1747 /* Uncomment the old data struct part only while testing new data structure for MatSolve() */ 1748 /* 1749 PetscBool olddatastruct=PETSC_FALSE; 1750 ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 1751 if (olddatastruct) { 1752 ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1753 PetscFunctionReturn(0); 1754 } 1755 */ 1756 1757 levels = (PetscInt)info->levels; 1758 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1759 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1760 if (!levels && row_identity && col_identity) { 1761 /* special case: ilu(0) with natural ordering */ 1762 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1763 if (a->inode.size) { 1764 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1765 } 1766 PetscFunctionReturn(0); 1767 } 1768 1769 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1770 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1771 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1772 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1773 1774 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1775 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1776 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1777 bi[0] = bdiag[0] = 0; 1778 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1779 1780 /* create a linked list for storing column indices of the active row */ 1781 nlnk = n + 1; 1782 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1783 1784 /* initial FreeSpace size is f*(ai[n]+1) */ 1785 f = info->fill; 1786 diagonal_fill = (PetscInt)info->diagonal_fill; 1787 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1788 current_space = free_space; 1789 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1790 current_space_lvl = free_space_lvl; 1791 for (i=0; i<n; i++) { 1792 nzi = 0; 1793 /* copy current row into linked list */ 1794 nnz = ai[r[i]+1] - ai[r[i]]; 1795 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1796 cols = aj + ai[r[i]]; 1797 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1798 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1799 nzi += nlnk; 1800 1801 /* make sure diagonal entry is included */ 1802 if (diagonal_fill && lnk[i] == -1) { 1803 fm = n; 1804 while (lnk[fm] < i) fm = lnk[fm]; 1805 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1806 lnk[fm] = i; 1807 lnk_lvl[i] = 0; 1808 nzi++; dcount++; 1809 } 1810 1811 /* add pivot rows into the active row */ 1812 nzbd = 0; 1813 prow = lnk[n]; 1814 while (prow < i) { 1815 nnz = bdiag[prow]; 1816 cols = bj_ptr[prow] + nnz + 1; 1817 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1818 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1819 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1820 nzi += nlnk; 1821 prow = lnk[prow]; 1822 nzbd++; 1823 } 1824 bdiag[i] = nzbd; 1825 bi[i+1] = bi[i] + nzi; 1826 /* if free space is not available, make more free space */ 1827 if (current_space->local_remaining<nzi) { 1828 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1829 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1830 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1831 reallocs++; 1832 } 1833 1834 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1835 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1836 bj_ptr[i] = current_space->array; 1837 bjlvl_ptr[i] = current_space_lvl->array; 1838 1839 /* make sure the active row i has diagonal entry */ 1840 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1841 1842 current_space->array += nzi; 1843 current_space->local_used += nzi; 1844 current_space->local_remaining -= nzi; 1845 current_space_lvl->array += nzi; 1846 current_space_lvl->local_used += nzi; 1847 current_space_lvl->local_remaining -= nzi; 1848 } 1849 1850 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1851 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1852 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1853 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1854 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1855 1856 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1857 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1858 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1859 1860 #if defined(PETSC_USE_INFO) 1861 { 1862 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1863 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1864 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1865 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1866 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1867 if (diagonal_fill) { 1868 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1869 } 1870 } 1871 #endif 1872 /* put together the new matrix */ 1873 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1874 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1875 b = (Mat_SeqAIJ*)(fact)->data; 1876 1877 b->free_a = PETSC_TRUE; 1878 b->free_ij = PETSC_TRUE; 1879 b->singlemalloc = PETSC_FALSE; 1880 1881 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1882 1883 b->j = bj; 1884 b->i = bi; 1885 b->diag = bdiag; 1886 b->ilen = 0; 1887 b->imax = 0; 1888 b->row = isrow; 1889 b->col = iscol; 1890 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1891 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1892 b->icol = isicol; 1893 1894 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1895 /* In b structure: Free imax, ilen, old a, old j. 1896 Allocate bdiag, solve_work, new a, new j */ 1897 ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1898 b->maxnz = b->nz = bdiag[0]+1; 1899 1900 (fact)->info.factor_mallocs = reallocs; 1901 (fact)->info.fill_ratio_given = f; 1902 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1903 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1904 if (a->inode.size) { 1905 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1906 } 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1912 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1913 { 1914 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1915 IS isicol; 1916 PetscErrorCode ierr; 1917 const PetscInt *r,*ic; 1918 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1919 PetscInt *bi,*cols,nnz,*cols_lvl; 1920 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1921 PetscInt i,levels,diagonal_fill; 1922 PetscBool col_identity,row_identity; 1923 PetscReal f; 1924 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1925 PetscBT lnkbt; 1926 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1927 PetscFreeSpaceList free_space =PETSC_NULL,current_space=PETSC_NULL; 1928 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1929 PetscBool missing; 1930 1931 PetscFunctionBegin; 1932 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1933 f = info->fill; 1934 levels = (PetscInt)info->levels; 1935 diagonal_fill = (PetscInt)info->diagonal_fill; 1936 1937 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1938 1939 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1940 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1941 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1942 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1943 1944 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1945 if (a->inode.size) { 1946 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1947 } 1948 fact->factortype = MAT_FACTOR_ILU; 1949 (fact)->info.factor_mallocs = 0; 1950 (fact)->info.fill_ratio_given = info->fill; 1951 (fact)->info.fill_ratio_needed = 1.0; 1952 1953 b = (Mat_SeqAIJ*)(fact)->data; 1954 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1955 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1956 b->row = isrow; 1957 b->col = iscol; 1958 b->icol = isicol; 1959 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1960 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1961 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1962 PetscFunctionReturn(0); 1963 } 1964 1965 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1966 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1967 1968 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1969 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1970 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1971 bi[0] = bdiag[0] = 0; 1972 1973 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1974 1975 /* create a linked list for storing column indices of the active row */ 1976 nlnk = n + 1; 1977 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1978 1979 /* initial FreeSpace size is f*(ai[n]+1) */ 1980 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1981 current_space = free_space; 1982 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1983 current_space_lvl = free_space_lvl; 1984 1985 for (i=0; i<n; i++) { 1986 nzi = 0; 1987 /* copy current row into linked list */ 1988 nnz = ai[r[i]+1] - ai[r[i]]; 1989 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1990 cols = aj + ai[r[i]]; 1991 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1992 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1993 nzi += nlnk; 1994 1995 /* make sure diagonal entry is included */ 1996 if (diagonal_fill && lnk[i] == -1) { 1997 fm = n; 1998 while (lnk[fm] < i) fm = lnk[fm]; 1999 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 2000 lnk[fm] = i; 2001 lnk_lvl[i] = 0; 2002 nzi++; dcount++; 2003 } 2004 2005 /* add pivot rows into the active row */ 2006 nzbd = 0; 2007 prow = lnk[n]; 2008 while (prow < i) { 2009 nnz = bdiag[prow]; 2010 cols = bj_ptr[prow] + nnz + 1; 2011 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 2012 nnz = bi[prow+1] - bi[prow] - nnz - 1; 2013 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 2014 nzi += nlnk; 2015 prow = lnk[prow]; 2016 nzbd++; 2017 } 2018 bdiag[i] = nzbd; 2019 bi[i+1] = bi[i] + nzi; 2020 2021 /* if free space is not available, make more free space */ 2022 if (current_space->local_remaining<nzi) { 2023 nnz = nzi*(n - i); /* estimated and max additional space needed */ 2024 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2025 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2026 reallocs++; 2027 } 2028 2029 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2030 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2031 bj_ptr[i] = current_space->array; 2032 bjlvl_ptr[i] = current_space_lvl->array; 2033 2034 /* make sure the active row i has diagonal entry */ 2035 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2036 2037 current_space->array += nzi; 2038 current_space->local_used += nzi; 2039 current_space->local_remaining -= nzi; 2040 current_space_lvl->array += nzi; 2041 current_space_lvl->local_used += nzi; 2042 current_space_lvl->local_remaining -= nzi; 2043 } 2044 2045 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2046 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2047 2048 /* destroy list of free space and other temporary arrays */ 2049 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2050 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2051 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2052 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2053 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2054 2055 #if defined(PETSC_USE_INFO) 2056 { 2057 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2058 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2059 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2060 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2061 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2062 if (diagonal_fill) { 2063 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2064 } 2065 } 2066 #endif 2067 2068 /* put together the new matrix */ 2069 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2070 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2071 b = (Mat_SeqAIJ*)(fact)->data; 2072 2073 b->free_a = PETSC_TRUE; 2074 b->free_ij = PETSC_TRUE; 2075 b->singlemalloc = PETSC_FALSE; 2076 2077 ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2078 b->j = bj; 2079 b->i = bi; 2080 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2081 b->diag = bdiag; 2082 b->ilen = 0; 2083 b->imax = 0; 2084 b->row = isrow; 2085 b->col = iscol; 2086 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2087 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2088 b->icol = isicol; 2089 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2090 /* In b structure: Free imax, ilen, old a, old j. 2091 Allocate bdiag, solve_work, new a, new j */ 2092 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2093 b->maxnz = b->nz = bi[n]; 2094 2095 (fact)->info.factor_mallocs = reallocs; 2096 (fact)->info.fill_ratio_given = f; 2097 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2098 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2099 if (a->inode.size) { 2100 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2101 } 2102 PetscFunctionReturn(0); 2103 } 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2107 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2108 { 2109 Mat C = B; 2110 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2111 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2112 IS ip=b->row,iip = b->icol; 2113 PetscErrorCode ierr; 2114 const PetscInt *rip,*riip; 2115 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2116 PetscInt *ai=a->i,*aj=a->j; 2117 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2118 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2119 PetscBool perm_identity; 2120 FactorShiftCtx sctx; 2121 PetscReal rs; 2122 MatScalar d,*v; 2123 2124 PetscFunctionBegin; 2125 /* MatPivotSetUp(): initialize shift context sctx */ 2126 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2127 2128 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2129 sctx.shift_top = info->zeropivot; 2130 for (i=0; i<mbs; i++) { 2131 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2132 d = (aa)[a->diag[i]]; 2133 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2134 v = aa+ai[i]; 2135 nz = ai[i+1] - ai[i]; 2136 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2137 if (rs>sctx.shift_top) sctx.shift_top = rs; 2138 } 2139 sctx.shift_top *= 1.1; 2140 sctx.nshift_max = 5; 2141 sctx.shift_lo = 0.; 2142 sctx.shift_hi = 1.; 2143 } 2144 2145 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2146 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2147 2148 /* allocate working arrays 2149 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2150 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2151 */ 2152 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 2153 2154 do { 2155 sctx.newshift = PETSC_FALSE; 2156 2157 for (i=0; i<mbs; i++) c2r[i] = mbs; 2158 if (mbs) il[0] = 0; 2159 2160 for (k = 0; k<mbs; k++) { 2161 /* zero rtmp */ 2162 nz = bi[k+1] - bi[k]; 2163 bjtmp = bj + bi[k]; 2164 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2165 2166 /* load in initial unfactored row */ 2167 bval = ba + bi[k]; 2168 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2169 for (j = jmin; j < jmax; j++) { 2170 col = riip[aj[j]]; 2171 if (col >= k) { /* only take upper triangular entry */ 2172 rtmp[col] = aa[j]; 2173 *bval++ = 0.0; /* for in-place factorization */ 2174 } 2175 } 2176 /* shift the diagonal of the matrix: ZeropivotApply() */ 2177 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2178 2179 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2180 dk = rtmp[k]; 2181 i = c2r[k]; /* first row to be added to k_th row */ 2182 2183 while (i < k) { 2184 nexti = c2r[i]; /* next row to be added to k_th row */ 2185 2186 /* compute multiplier, update diag(k) and U(i,k) */ 2187 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2188 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2189 dk += uikdi*ba[ili]; /* update diag[k] */ 2190 ba[ili] = uikdi; /* -U(i,k) */ 2191 2192 /* add multiple of row i to k-th row */ 2193 jmin = ili + 1; jmax = bi[i+1]; 2194 if (jmin < jmax) { 2195 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2196 /* update il and c2r for row i */ 2197 il[i] = jmin; 2198 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2199 } 2200 i = nexti; 2201 } 2202 2203 /* copy data into U(k,:) */ 2204 rs = 0.0; 2205 jmin = bi[k]; jmax = bi[k+1]-1; 2206 if (jmin < jmax) { 2207 for (j=jmin; j<jmax; j++) { 2208 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2209 } 2210 /* add the k-th row into il and c2r */ 2211 il[k] = jmin; 2212 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2213 } 2214 2215 /* MatPivotCheck() */ 2216 sctx.rs = rs; 2217 sctx.pv = dk; 2218 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2219 if (sctx.newshift) break; 2220 dk = sctx.pv; 2221 2222 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2223 } 2224 } while (sctx.newshift); 2225 2226 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2227 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2228 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2229 2230 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2231 if (perm_identity) { 2232 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2233 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2234 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2235 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2236 } else { 2237 B->ops->solve = MatSolve_SeqSBAIJ_1; 2238 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2239 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2240 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2241 } 2242 2243 C->assembled = PETSC_TRUE; 2244 C->preallocated = PETSC_TRUE; 2245 2246 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2247 2248 /* MatPivotView() */ 2249 if (sctx.nshift) { 2250 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2251 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 2252 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2253 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2254 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2255 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 2256 } 2257 } 2258 PetscFunctionReturn(0); 2259 } 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2263 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2264 { 2265 Mat C = B; 2266 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2267 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2268 IS ip=b->row,iip = b->icol; 2269 PetscErrorCode ierr; 2270 const PetscInt *rip,*riip; 2271 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2272 PetscInt *ai=a->i,*aj=a->j; 2273 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2274 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2275 PetscBool perm_identity; 2276 FactorShiftCtx sctx; 2277 PetscReal rs; 2278 MatScalar d,*v; 2279 2280 PetscFunctionBegin; 2281 /* MatPivotSetUp(): initialize shift context sctx */ 2282 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2283 2284 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2285 sctx.shift_top = info->zeropivot; 2286 for (i=0; i<mbs; i++) { 2287 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2288 d = (aa)[a->diag[i]]; 2289 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2290 v = aa+ai[i]; 2291 nz = ai[i+1] - ai[i]; 2292 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2293 if (rs>sctx.shift_top) sctx.shift_top = rs; 2294 } 2295 sctx.shift_top *= 1.1; 2296 sctx.nshift_max = 5; 2297 sctx.shift_lo = 0.; 2298 sctx.shift_hi = 1.; 2299 } 2300 2301 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2302 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2303 2304 /* initialization */ 2305 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 2306 2307 do { 2308 sctx.newshift = PETSC_FALSE; 2309 2310 for (i=0; i<mbs; i++) jl[i] = mbs; 2311 il[0] = 0; 2312 2313 for (k = 0; k<mbs; k++) { 2314 /* zero rtmp */ 2315 nz = bi[k+1] - bi[k]; 2316 bjtmp = bj + bi[k]; 2317 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2318 2319 bval = ba + bi[k]; 2320 /* initialize k-th row by the perm[k]-th row of A */ 2321 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2322 for (j = jmin; j < jmax; j++) { 2323 col = riip[aj[j]]; 2324 if (col >= k) { /* only take upper triangular entry */ 2325 rtmp[col] = aa[j]; 2326 *bval++ = 0.0; /* for in-place factorization */ 2327 } 2328 } 2329 /* shift the diagonal of the matrix */ 2330 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2331 2332 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2333 dk = rtmp[k]; 2334 i = jl[k]; /* first row to be added to k_th row */ 2335 2336 while (i < k) { 2337 nexti = jl[i]; /* next row to be added to k_th row */ 2338 2339 /* compute multiplier, update diag(k) and U(i,k) */ 2340 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2341 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2342 dk += uikdi*ba[ili]; 2343 ba[ili] = uikdi; /* -U(i,k) */ 2344 2345 /* add multiple of row i to k-th row */ 2346 jmin = ili + 1; jmax = bi[i+1]; 2347 if (jmin < jmax) { 2348 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2349 /* update il and jl for row i */ 2350 il[i] = jmin; 2351 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2352 } 2353 i = nexti; 2354 } 2355 2356 /* shift the diagonals when zero pivot is detected */ 2357 /* compute rs=sum of abs(off-diagonal) */ 2358 rs = 0.0; 2359 jmin = bi[k]+1; 2360 nz = bi[k+1] - jmin; 2361 bcol = bj + jmin; 2362 for (j=0; j<nz; j++) { 2363 rs += PetscAbsScalar(rtmp[bcol[j]]); 2364 } 2365 2366 sctx.rs = rs; 2367 sctx.pv = dk; 2368 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2369 if (sctx.newshift) break; 2370 dk = sctx.pv; 2371 2372 /* copy data into U(k,:) */ 2373 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2374 jmin = bi[k]+1; jmax = bi[k+1]; 2375 if (jmin < jmax) { 2376 for (j=jmin; j<jmax; j++) { 2377 col = bj[j]; ba[j] = rtmp[col]; 2378 } 2379 /* add the k-th row into il and jl */ 2380 il[k] = jmin; 2381 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2382 } 2383 } 2384 } while (sctx.newshift); 2385 2386 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2387 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2388 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2389 2390 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2391 if (perm_identity) { 2392 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2393 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2394 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2395 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2396 } else { 2397 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2398 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2399 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2400 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2401 } 2402 2403 C->assembled = PETSC_TRUE; 2404 C->preallocated = PETSC_TRUE; 2405 2406 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2407 if (sctx.nshift) { 2408 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2409 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2410 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2411 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2412 } 2413 } 2414 PetscFunctionReturn(0); 2415 } 2416 2417 /* 2418 icc() under revised new data structure. 2419 Factored arrays bj and ba are stored as 2420 U(0,:),...,U(i,:),U(n-1,:) 2421 2422 ui=fact->i is an array of size n+1, in which 2423 ui+ 2424 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2425 ui[n]: points to U(n-1,n-1)+1 2426 2427 udiag=fact->diag is an array of size n,in which 2428 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2429 2430 U(i,:) contains udiag[i] as its last entry, i.e., 2431 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2432 */ 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2436 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2437 { 2438 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2439 Mat_SeqSBAIJ *b; 2440 PetscErrorCode ierr; 2441 PetscBool perm_identity,missing; 2442 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2443 const PetscInt *rip,*riip; 2444 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2445 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2446 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2447 PetscReal fill =info->fill,levels=info->levels; 2448 PetscFreeSpaceList free_space =PETSC_NULL,current_space=PETSC_NULL; 2449 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2450 PetscBT lnkbt; 2451 IS iperm; 2452 2453 PetscFunctionBegin; 2454 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2455 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2456 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2457 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2458 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2459 2460 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2461 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2462 ui[0] = 0; 2463 2464 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2465 if (!levels && perm_identity) { 2466 for (i=0; i<am; i++) { 2467 ncols = ai[i+1] - a->diag[i]; 2468 ui[i+1] = ui[i] + ncols; 2469 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2470 } 2471 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2472 cols = uj; 2473 for (i=0; i<am; i++) { 2474 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2475 ncols = ai[i+1] - a->diag[i] -1; 2476 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2477 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2478 } 2479 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2480 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2481 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2482 2483 /* initialization */ 2484 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2485 2486 /* jl: linked list for storing indices of the pivot rows 2487 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2488 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2489 for (i=0; i<am; i++) { 2490 jl[i] = am; il[i] = 0; 2491 } 2492 2493 /* create and initialize a linked list for storing column indices of the active row k */ 2494 nlnk = am + 1; 2495 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2496 2497 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2498 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2499 current_space = free_space; 2500 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2501 current_space_lvl = free_space_lvl; 2502 2503 for (k=0; k<am; k++) { /* for each active row k */ 2504 /* initialize lnk by the column indices of row rip[k] of A */ 2505 nzk = 0; 2506 ncols = ai[rip[k]+1] - ai[rip[k]]; 2507 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2508 ncols_upper = 0; 2509 for (j=0; j<ncols; j++) { 2510 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2511 if (riip[i] >= k) { /* only take upper triangular entry */ 2512 ajtmp[ncols_upper] = i; 2513 ncols_upper++; 2514 } 2515 } 2516 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2517 nzk += nlnk; 2518 2519 /* update lnk by computing fill-in for each pivot row to be merged in */ 2520 prow = jl[k]; /* 1st pivot row */ 2521 2522 while (prow < k) { 2523 nextprow = jl[prow]; 2524 2525 /* merge prow into k-th row */ 2526 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2527 jmax = ui[prow+1]; 2528 ncols = jmax-jmin; 2529 i = jmin - ui[prow]; 2530 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2531 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2532 j = *(uj - 1); 2533 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2534 nzk += nlnk; 2535 2536 /* update il and jl for prow */ 2537 if (jmin < jmax) { 2538 il[prow] = jmin; 2539 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2540 } 2541 prow = nextprow; 2542 } 2543 2544 /* if free space is not available, make more free space */ 2545 if (current_space->local_remaining<nzk) { 2546 i = am - k + 1; /* num of unfactored rows */ 2547 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2548 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2549 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2550 reallocs++; 2551 } 2552 2553 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2554 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2555 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2556 2557 /* add the k-th row into il and jl */ 2558 if (nzk > 1) { 2559 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2560 jl[k] = jl[i]; jl[i] = k; 2561 il[k] = ui[k] + 1; 2562 } 2563 uj_ptr[k] = current_space->array; 2564 uj_lvl_ptr[k] = current_space_lvl->array; 2565 2566 current_space->array += nzk; 2567 current_space->local_used += nzk; 2568 current_space->local_remaining -= nzk; 2569 2570 current_space_lvl->array += nzk; 2571 current_space_lvl->local_used += nzk; 2572 current_space_lvl->local_remaining -= nzk; 2573 2574 ui[k+1] = ui[k] + nzk; 2575 } 2576 2577 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2578 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2579 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2580 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2581 2582 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2583 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2584 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2585 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2586 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2587 2588 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2589 2590 /* put together the new matrix in MATSEQSBAIJ format */ 2591 b = (Mat_SeqSBAIJ*)(fact)->data; 2592 b->singlemalloc = PETSC_FALSE; 2593 2594 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2595 2596 b->j = uj; 2597 b->i = ui; 2598 b->diag = udiag; 2599 b->free_diag = PETSC_TRUE; 2600 b->ilen = 0; 2601 b->imax = 0; 2602 b->row = perm; 2603 b->col = perm; 2604 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2605 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2606 b->icol = iperm; 2607 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2608 2609 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2610 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2611 2612 b->maxnz = b->nz = ui[am]; 2613 b->free_a = PETSC_TRUE; 2614 b->free_ij = PETSC_TRUE; 2615 2616 fact->info.factor_mallocs = reallocs; 2617 fact->info.fill_ratio_given = fill; 2618 if (ai[am] != 0) { 2619 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2620 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2621 } else { 2622 fact->info.fill_ratio_needed = 0.0; 2623 } 2624 #if defined(PETSC_USE_INFO) 2625 if (ai[am] != 0) { 2626 PetscReal af = fact->info.fill_ratio_needed; 2627 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2628 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2629 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2630 } else { 2631 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2632 } 2633 #endif 2634 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2635 PetscFunctionReturn(0); 2636 } 2637 2638 #undef __FUNCT__ 2639 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2640 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2641 { 2642 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2643 Mat_SeqSBAIJ *b; 2644 PetscErrorCode ierr; 2645 PetscBool perm_identity,missing; 2646 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2647 const PetscInt *rip,*riip; 2648 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2649 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2650 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2651 PetscReal fill =info->fill,levels=info->levels; 2652 PetscFreeSpaceList free_space =PETSC_NULL,current_space=PETSC_NULL; 2653 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2654 PetscBT lnkbt; 2655 IS iperm; 2656 2657 PetscFunctionBegin; 2658 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2659 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2660 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2661 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2662 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2663 2664 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2665 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2666 ui[0] = 0; 2667 2668 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2669 if (!levels && perm_identity) { 2670 2671 for (i=0; i<am; i++) { 2672 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2673 udiag[i] = ui[i]; 2674 } 2675 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2676 cols = uj; 2677 for (i=0; i<am; i++) { 2678 aj = a->j + a->diag[i]; 2679 ncols = ui[i+1] - ui[i]; 2680 for (j=0; j<ncols; j++) *cols++ = *aj++; 2681 } 2682 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2683 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2684 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2685 2686 /* initialization */ 2687 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2688 2689 /* jl: linked list for storing indices of the pivot rows 2690 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2691 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2692 for (i=0; i<am; i++) { 2693 jl[i] = am; il[i] = 0; 2694 } 2695 2696 /* create and initialize a linked list for storing column indices of the active row k */ 2697 nlnk = am + 1; 2698 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2699 2700 /* initial FreeSpace size is fill*(ai[am]+1) */ 2701 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2702 current_space = free_space; 2703 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2704 current_space_lvl = free_space_lvl; 2705 2706 for (k=0; k<am; k++) { /* for each active row k */ 2707 /* initialize lnk by the column indices of row rip[k] of A */ 2708 nzk = 0; 2709 ncols = ai[rip[k]+1] - ai[rip[k]]; 2710 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2711 ncols_upper = 0; 2712 for (j=0; j<ncols; j++) { 2713 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2714 if (riip[i] >= k) { /* only take upper triangular entry */ 2715 ajtmp[ncols_upper] = i; 2716 ncols_upper++; 2717 } 2718 } 2719 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2720 nzk += nlnk; 2721 2722 /* update lnk by computing fill-in for each pivot row to be merged in */ 2723 prow = jl[k]; /* 1st pivot row */ 2724 2725 while (prow < k) { 2726 nextprow = jl[prow]; 2727 2728 /* merge prow into k-th row */ 2729 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2730 jmax = ui[prow+1]; 2731 ncols = jmax-jmin; 2732 i = jmin - ui[prow]; 2733 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2734 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2735 j = *(uj - 1); 2736 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2737 nzk += nlnk; 2738 2739 /* update il and jl for prow */ 2740 if (jmin < jmax) { 2741 il[prow] = jmin; 2742 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2743 } 2744 prow = nextprow; 2745 } 2746 2747 /* if free space is not available, make more free space */ 2748 if (current_space->local_remaining<nzk) { 2749 i = am - k + 1; /* num of unfactored rows */ 2750 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2751 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2752 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2753 reallocs++; 2754 } 2755 2756 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2757 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2758 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2759 2760 /* add the k-th row into il and jl */ 2761 if (nzk > 1) { 2762 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2763 jl[k] = jl[i]; jl[i] = k; 2764 il[k] = ui[k] + 1; 2765 } 2766 uj_ptr[k] = current_space->array; 2767 uj_lvl_ptr[k] = current_space_lvl->array; 2768 2769 current_space->array += nzk; 2770 current_space->local_used += nzk; 2771 current_space->local_remaining -= nzk; 2772 2773 current_space_lvl->array += nzk; 2774 current_space_lvl->local_used += nzk; 2775 current_space_lvl->local_remaining -= nzk; 2776 2777 ui[k+1] = ui[k] + nzk; 2778 } 2779 2780 #if defined(PETSC_USE_INFO) 2781 if (ai[am] != 0) { 2782 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2783 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2784 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2785 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2786 } else { 2787 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2788 } 2789 #endif 2790 2791 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2792 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2793 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2794 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2795 2796 /* destroy list of free space and other temporary array(s) */ 2797 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2798 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2799 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2800 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2801 2802 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2803 2804 /* put together the new matrix in MATSEQSBAIJ format */ 2805 2806 b = (Mat_SeqSBAIJ*)fact->data; 2807 b->singlemalloc = PETSC_FALSE; 2808 2809 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2810 2811 b->j = uj; 2812 b->i = ui; 2813 b->diag = udiag; 2814 b->free_diag = PETSC_TRUE; 2815 b->ilen = 0; 2816 b->imax = 0; 2817 b->row = perm; 2818 b->col = perm; 2819 2820 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2821 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2822 2823 b->icol = iperm; 2824 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2825 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2826 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2827 b->maxnz = b->nz = ui[am]; 2828 b->free_a = PETSC_TRUE; 2829 b->free_ij = PETSC_TRUE; 2830 2831 fact->info.factor_mallocs = reallocs; 2832 fact->info.fill_ratio_given = fill; 2833 if (ai[am] != 0) { 2834 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2835 } else { 2836 fact->info.fill_ratio_needed = 0.0; 2837 } 2838 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2839 PetscFunctionReturn(0); 2840 } 2841 2842 #undef __FUNCT__ 2843 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2844 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2845 { 2846 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2847 Mat_SeqSBAIJ *b; 2848 PetscErrorCode ierr; 2849 PetscBool perm_identity; 2850 PetscReal fill = info->fill; 2851 const PetscInt *rip,*riip; 2852 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2853 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2854 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2855 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2856 PetscBT lnkbt; 2857 IS iperm; 2858 2859 PetscFunctionBegin; 2860 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2861 /* check whether perm is the identity mapping */ 2862 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2863 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2864 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2865 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2866 2867 /* initialization */ 2868 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2869 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2870 ui[0] = 0; 2871 2872 /* jl: linked list for storing indices of the pivot rows 2873 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2874 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2875 for (i=0; i<am; i++) { 2876 jl[i] = am; il[i] = 0; 2877 } 2878 2879 /* create and initialize a linked list for storing column indices of the active row k */ 2880 nlnk = am + 1; 2881 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2882 2883 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2884 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2885 current_space = free_space; 2886 2887 for (k=0; k<am; k++) { /* for each active row k */ 2888 /* initialize lnk by the column indices of row rip[k] of A */ 2889 nzk = 0; 2890 ncols = ai[rip[k]+1] - ai[rip[k]]; 2891 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2892 ncols_upper = 0; 2893 for (j=0; j<ncols; j++) { 2894 i = riip[*(aj + ai[rip[k]] + j)]; 2895 if (i >= k) { /* only take upper triangular entry */ 2896 cols[ncols_upper] = i; 2897 ncols_upper++; 2898 } 2899 } 2900 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2901 nzk += nlnk; 2902 2903 /* update lnk by computing fill-in for each pivot row to be merged in */ 2904 prow = jl[k]; /* 1st pivot row */ 2905 2906 while (prow < k) { 2907 nextprow = jl[prow]; 2908 /* merge prow into k-th row */ 2909 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2910 jmax = ui[prow+1]; 2911 ncols = jmax-jmin; 2912 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2913 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2914 nzk += nlnk; 2915 2916 /* update il and jl for prow */ 2917 if (jmin < jmax) { 2918 il[prow] = jmin; 2919 j = *uj_ptr; 2920 jl[prow] = jl[j]; 2921 jl[j] = prow; 2922 } 2923 prow = nextprow; 2924 } 2925 2926 /* if free space is not available, make more free space */ 2927 if (current_space->local_remaining<nzk) { 2928 i = am - k + 1; /* num of unfactored rows */ 2929 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2930 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2931 reallocs++; 2932 } 2933 2934 /* copy data into free space, then initialize lnk */ 2935 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2936 2937 /* add the k-th row into il and jl */ 2938 if (nzk > 1) { 2939 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2940 jl[k] = jl[i]; jl[i] = k; 2941 il[k] = ui[k] + 1; 2942 } 2943 ui_ptr[k] = current_space->array; 2944 2945 current_space->array += nzk; 2946 current_space->local_used += nzk; 2947 current_space->local_remaining -= nzk; 2948 2949 ui[k+1] = ui[k] + nzk; 2950 } 2951 2952 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2953 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2954 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2955 2956 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2957 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2958 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2959 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2960 2961 /* put together the new matrix in MATSEQSBAIJ format */ 2962 2963 b = (Mat_SeqSBAIJ*)fact->data; 2964 b->singlemalloc = PETSC_FALSE; 2965 b->free_a = PETSC_TRUE; 2966 b->free_ij = PETSC_TRUE; 2967 2968 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2969 2970 b->j = uj; 2971 b->i = ui; 2972 b->diag = udiag; 2973 b->free_diag = PETSC_TRUE; 2974 b->ilen = 0; 2975 b->imax = 0; 2976 b->row = perm; 2977 b->col = perm; 2978 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2979 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2980 2981 b->icol = iperm; 2982 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2983 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2984 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2985 2986 b->maxnz = b->nz = ui[am]; 2987 2988 fact->info.factor_mallocs = reallocs; 2989 fact->info.fill_ratio_given = fill; 2990 if (ai[am] != 0) { 2991 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2992 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2993 } else { 2994 fact->info.fill_ratio_needed = 0.0; 2995 } 2996 #if defined(PETSC_USE_INFO) 2997 if (ai[am] != 0) { 2998 PetscReal af = fact->info.fill_ratio_needed; 2999 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3000 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3001 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3002 } else { 3003 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3004 } 3005 #endif 3006 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 3007 PetscFunctionReturn(0); 3008 } 3009 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 3012 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 3013 { 3014 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3015 Mat_SeqSBAIJ *b; 3016 PetscErrorCode ierr; 3017 PetscBool perm_identity; 3018 PetscReal fill = info->fill; 3019 const PetscInt *rip,*riip; 3020 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 3021 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 3022 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 3023 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3024 PetscBT lnkbt; 3025 IS iperm; 3026 3027 PetscFunctionBegin; 3028 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 3029 /* check whether perm is the identity mapping */ 3030 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 3031 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 3032 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 3033 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 3034 3035 /* initialization */ 3036 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 3037 ui[0] = 0; 3038 3039 /* jl: linked list for storing indices of the pivot rows 3040 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3041 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 3042 for (i=0; i<am; i++) { 3043 jl[i] = am; il[i] = 0; 3044 } 3045 3046 /* create and initialize a linked list for storing column indices of the active row k */ 3047 nlnk = am + 1; 3048 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3049 3050 /* initial FreeSpace size is fill*(ai[am]+1) */ 3051 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3052 current_space = free_space; 3053 3054 for (k=0; k<am; k++) { /* for each active row k */ 3055 /* initialize lnk by the column indices of row rip[k] of A */ 3056 nzk = 0; 3057 ncols = ai[rip[k]+1] - ai[rip[k]]; 3058 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3059 ncols_upper = 0; 3060 for (j=0; j<ncols; j++) { 3061 i = riip[*(aj + ai[rip[k]] + j)]; 3062 if (i >= k) { /* only take upper triangular entry */ 3063 cols[ncols_upper] = i; 3064 ncols_upper++; 3065 } 3066 } 3067 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3068 nzk += nlnk; 3069 3070 /* update lnk by computing fill-in for each pivot row to be merged in */ 3071 prow = jl[k]; /* 1st pivot row */ 3072 3073 while (prow < k) { 3074 nextprow = jl[prow]; 3075 /* merge prow into k-th row */ 3076 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3077 jmax = ui[prow+1]; 3078 ncols = jmax-jmin; 3079 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3080 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3081 nzk += nlnk; 3082 3083 /* update il and jl for prow */ 3084 if (jmin < jmax) { 3085 il[prow] = jmin; 3086 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3087 } 3088 prow = nextprow; 3089 } 3090 3091 /* if free space is not available, make more free space */ 3092 if (current_space->local_remaining<nzk) { 3093 i = am - k + 1; /* num of unfactored rows */ 3094 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3095 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3096 reallocs++; 3097 } 3098 3099 /* copy data into free space, then initialize lnk */ 3100 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3101 3102 /* add the k-th row into il and jl */ 3103 if (nzk-1 > 0) { 3104 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3105 jl[k] = jl[i]; jl[i] = k; 3106 il[k] = ui[k] + 1; 3107 } 3108 ui_ptr[k] = current_space->array; 3109 3110 current_space->array += nzk; 3111 current_space->local_used += nzk; 3112 current_space->local_remaining -= nzk; 3113 3114 ui[k+1] = ui[k] + nzk; 3115 } 3116 3117 #if defined(PETSC_USE_INFO) 3118 if (ai[am] != 0) { 3119 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3120 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3121 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3122 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3123 } else { 3124 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3125 } 3126 #endif 3127 3128 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3129 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3130 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3131 3132 /* destroy list of free space and other temporary array(s) */ 3133 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3134 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3135 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3136 3137 /* put together the new matrix in MATSEQSBAIJ format */ 3138 3139 b = (Mat_SeqSBAIJ*)fact->data; 3140 b->singlemalloc = PETSC_FALSE; 3141 b->free_a = PETSC_TRUE; 3142 b->free_ij = PETSC_TRUE; 3143 3144 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 3145 3146 b->j = uj; 3147 b->i = ui; 3148 b->diag = 0; 3149 b->ilen = 0; 3150 b->imax = 0; 3151 b->row = perm; 3152 b->col = perm; 3153 3154 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3155 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3156 3157 b->icol = iperm; 3158 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3159 3160 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3161 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3162 b->maxnz = b->nz = ui[am]; 3163 3164 fact->info.factor_mallocs = reallocs; 3165 fact->info.fill_ratio_given = fill; 3166 if (ai[am] != 0) { 3167 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3168 } else { 3169 fact->info.fill_ratio_needed = 0.0; 3170 } 3171 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3172 PetscFunctionReturn(0); 3173 } 3174 3175 #undef __FUNCT__ 3176 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3177 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3178 { 3179 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3180 PetscErrorCode ierr; 3181 PetscInt n = A->rmap->n; 3182 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3183 PetscScalar *x,sum; 3184 const PetscScalar *b; 3185 const MatScalar *aa = a->a,*v; 3186 PetscInt i,nz; 3187 3188 PetscFunctionBegin; 3189 if (!n) PetscFunctionReturn(0); 3190 3191 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3193 3194 /* forward solve the lower triangular */ 3195 x[0] = b[0]; 3196 v = aa; 3197 vi = aj; 3198 for (i=1; i<n; i++) { 3199 nz = ai[i+1] - ai[i]; 3200 sum = b[i]; 3201 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3202 v += nz; 3203 vi += nz; 3204 x[i] = sum; 3205 } 3206 3207 /* backward solve the upper triangular */ 3208 for (i=n-1; i>=0; i--) { 3209 v = aa + adiag[i+1] + 1; 3210 vi = aj + adiag[i+1] + 1; 3211 nz = adiag[i] - adiag[i+1]-1; 3212 sum = x[i]; 3213 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3214 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3215 } 3216 3217 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3218 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3219 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3220 PetscFunctionReturn(0); 3221 } 3222 3223 #undef __FUNCT__ 3224 #define __FUNCT__ "MatSolve_SeqAIJ" 3225 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3226 { 3227 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3228 IS iscol = a->col,isrow = a->row; 3229 PetscErrorCode ierr; 3230 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3231 const PetscInt *rout,*cout,*r,*c; 3232 PetscScalar *x,*tmp,sum; 3233 const PetscScalar *b; 3234 const MatScalar *aa = a->a,*v; 3235 3236 PetscFunctionBegin; 3237 if (!n) PetscFunctionReturn(0); 3238 3239 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3240 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3241 tmp = a->solve_work; 3242 3243 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3244 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3245 3246 /* forward solve the lower triangular */ 3247 tmp[0] = b[r[0]]; 3248 v = aa; 3249 vi = aj; 3250 for (i=1; i<n; i++) { 3251 nz = ai[i+1] - ai[i]; 3252 sum = b[r[i]]; 3253 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3254 tmp[i] = sum; 3255 v += nz; vi += nz; 3256 } 3257 3258 /* backward solve the upper triangular */ 3259 for (i=n-1; i>=0; i--) { 3260 v = aa + adiag[i+1]+1; 3261 vi = aj + adiag[i+1]+1; 3262 nz = adiag[i]-adiag[i+1]-1; 3263 sum = tmp[i]; 3264 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3265 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3266 } 3267 3268 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3269 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3270 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3271 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3272 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3273 PetscFunctionReturn(0); 3274 } 3275 3276 #undef __FUNCT__ 3277 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3278 /* 3279 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3280 */ 3281 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3282 { 3283 Mat B = *fact; 3284 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3285 IS isicol; 3286 PetscErrorCode ierr; 3287 const PetscInt *r,*ic; 3288 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3289 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3290 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3291 PetscInt nlnk,*lnk; 3292 PetscBT lnkbt; 3293 PetscBool row_identity,icol_identity; 3294 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3295 const PetscInt *ics; 3296 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3297 PetscReal dt =info->dt,shift=info->shiftamount; 3298 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3299 PetscBool missing; 3300 3301 PetscFunctionBegin; 3302 if (dt == PETSC_DEFAULT) dt = 0.005; 3303 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3304 3305 /* ------- symbolic factorization, can be reused ---------*/ 3306 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3307 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3308 adiag=a->diag; 3309 3310 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3311 3312 /* bdiag is location of diagonal in factor */ 3313 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3314 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3315 3316 /* allocate row pointers bi */ 3317 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3318 3319 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3320 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3321 nnz_max = ai[n]+2*n*dtcount+2; 3322 3323 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3324 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3325 3326 /* put together the new matrix */ 3327 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3328 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3329 b = (Mat_SeqAIJ*)B->data; 3330 3331 b->free_a = PETSC_TRUE; 3332 b->free_ij = PETSC_TRUE; 3333 b->singlemalloc = PETSC_FALSE; 3334 3335 b->a = ba; 3336 b->j = bj; 3337 b->i = bi; 3338 b->diag = bdiag; 3339 b->ilen = 0; 3340 b->imax = 0; 3341 b->row = isrow; 3342 b->col = iscol; 3343 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3344 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3345 b->icol = isicol; 3346 3347 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3348 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3349 b->maxnz = nnz_max; 3350 3351 B->factortype = MAT_FACTOR_ILUDT; 3352 B->info.factor_mallocs = 0; 3353 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3354 CHKMEMQ; 3355 /* ------- end of symbolic factorization ---------*/ 3356 3357 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3358 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3359 ics = ic; 3360 3361 /* linked list for storing column indices of the active row */ 3362 nlnk = n + 1; 3363 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3364 3365 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3366 ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr); 3367 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3368 ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr); 3369 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3370 3371 bi[0] = 0; 3372 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3373 bdiag_rev[n] = bdiag[0]; 3374 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3375 for (i=0; i<n; i++) { 3376 /* copy initial fill into linked list */ 3377 nzi = ai[r[i]+1] - ai[r[i]]; 3378 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3379 nzi_al = adiag[r[i]] - ai[r[i]]; 3380 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3381 ajtmp = aj + ai[r[i]]; 3382 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3383 3384 /* load in initial (unfactored row) */ 3385 aatmp = a->a + ai[r[i]]; 3386 for (j=0; j<nzi; j++) { 3387 rtmp[ics[*ajtmp++]] = *aatmp++; 3388 } 3389 3390 /* add pivot rows into linked list */ 3391 row = lnk[n]; 3392 while (row < i) { 3393 nzi_bl = bi[row+1] - bi[row] + 1; 3394 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3395 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3396 nzi += nlnk; 3397 row = lnk[row]; 3398 } 3399 3400 /* copy data from lnk into jtmp, then initialize lnk */ 3401 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3402 3403 /* numerical factorization */ 3404 bjtmp = jtmp; 3405 row = *bjtmp++; /* 1st pivot row */ 3406 while (row < i) { 3407 pc = rtmp + row; 3408 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3409 multiplier = (*pc) * (*pv); 3410 *pc = multiplier; 3411 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3412 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3413 pv = ba + bdiag[row+1] + 1; 3414 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3415 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3416 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3417 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3418 } 3419 row = *bjtmp++; 3420 } 3421 3422 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3423 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3424 nzi_bl = 0; j = 0; 3425 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3426 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3427 nzi_bl++; j++; 3428 } 3429 nzi_bu = nzi - nzi_bl -1; 3430 while (j < nzi) { 3431 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3432 j++; 3433 } 3434 3435 bjtmp = bj + bi[i]; 3436 batmp = ba + bi[i]; 3437 /* apply level dropping rule to L part */ 3438 ncut = nzi_al + dtcount; 3439 if (ncut < nzi_bl) { 3440 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3441 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3442 } else { 3443 ncut = nzi_bl; 3444 } 3445 for (j=0; j<ncut; j++) { 3446 bjtmp[j] = jtmp[j]; 3447 batmp[j] = vtmp[j]; 3448 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3449 } 3450 bi[i+1] = bi[i] + ncut; 3451 nzi = ncut + 1; 3452 3453 /* apply level dropping rule to U part */ 3454 ncut = nzi_au + dtcount; 3455 if (ncut < nzi_bu) { 3456 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3457 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3458 } else { 3459 ncut = nzi_bu; 3460 } 3461 nzi += ncut; 3462 3463 /* mark bdiagonal */ 3464 bdiag[i+1] = bdiag[i] - (ncut + 1); 3465 bdiag_rev[n-i-1] = bdiag[i+1]; 3466 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3467 bjtmp = bj + bdiag[i]; 3468 batmp = ba + bdiag[i]; 3469 *bjtmp = i; 3470 *batmp = diag_tmp; /* rtmp[i]; */ 3471 if (*batmp == 0.0) { 3472 *batmp = dt+shift; 3473 /* printf(" row %d add shift %g\n",i,shift); */ 3474 } 3475 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3476 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3477 3478 bjtmp = bj + bdiag[i+1]+1; 3479 batmp = ba + bdiag[i+1]+1; 3480 for (k=0; k<ncut; k++) { 3481 bjtmp[k] = jtmp[nzi_bl+1+k]; 3482 batmp[k] = vtmp[nzi_bl+1+k]; 3483 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3484 } 3485 /* printf("\n"); */ 3486 3487 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3488 /* 3489 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3490 printf(" ----------------------------\n"); 3491 */ 3492 } /* for (i=0; i<n; i++) */ 3493 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3494 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3495 3496 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3497 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3498 3499 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3500 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3501 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3502 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3503 3504 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3505 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3506 3507 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3508 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3509 if (row_identity && icol_identity) { 3510 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3511 } else { 3512 B->ops->solve = MatSolve_SeqAIJ; 3513 } 3514 3515 B->ops->solveadd = 0; 3516 B->ops->solvetranspose = 0; 3517 B->ops->solvetransposeadd = 0; 3518 B->ops->matsolve = 0; 3519 B->assembled = PETSC_TRUE; 3520 B->preallocated = PETSC_TRUE; 3521 PetscFunctionReturn(0); 3522 } 3523 3524 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3525 #undef __FUNCT__ 3526 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3527 /* 3528 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3529 */ 3530 3531 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3532 { 3533 PetscErrorCode ierr; 3534 3535 PetscFunctionBegin; 3536 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3537 PetscFunctionReturn(0); 3538 } 3539 3540 /* 3541 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3542 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3543 */ 3544 #undef __FUNCT__ 3545 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3546 /* 3547 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3548 */ 3549 3550 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3551 { 3552 Mat C =fact; 3553 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3554 IS isrow = b->row,isicol = b->icol; 3555 PetscErrorCode ierr; 3556 const PetscInt *r,*ic,*ics; 3557 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3558 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3559 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3560 PetscReal dt=info->dt,shift=info->shiftamount; 3561 PetscBool row_identity, col_identity; 3562 3563 PetscFunctionBegin; 3564 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3565 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3566 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3567 ics = ic; 3568 3569 for (i=0; i<n; i++) { 3570 /* initialize rtmp array */ 3571 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3572 bjtmp = bj + bi[i]; 3573 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3574 rtmp[i] = 0.0; 3575 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3576 bjtmp = bj + bdiag[i+1] + 1; 3577 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3578 3579 /* load in initial unfactored row of A */ 3580 /* printf("row %d\n",i); */ 3581 nz = ai[r[i]+1] - ai[r[i]]; 3582 ajtmp = aj + ai[r[i]]; 3583 v = aa + ai[r[i]]; 3584 for (j=0; j<nz; j++) { 3585 rtmp[ics[*ajtmp++]] = v[j]; 3586 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3587 } 3588 /* printf("\n"); */ 3589 3590 /* numerical factorization */ 3591 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3592 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3593 k = 0; 3594 while (k < nzl) { 3595 row = *bjtmp++; 3596 /* printf(" prow %d\n",row); */ 3597 pc = rtmp + row; 3598 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3599 multiplier = (*pc) * (*pv); 3600 *pc = multiplier; 3601 if (PetscAbsScalar(multiplier) > dt) { 3602 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3603 pv = b->a + bdiag[row+1] + 1; 3604 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3605 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3606 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3607 } 3608 k++; 3609 } 3610 3611 /* finished row so stick it into b->a */ 3612 /* L-part */ 3613 pv = b->a + bi[i]; 3614 pj = bj + bi[i]; 3615 nzl = bi[i+1] - bi[i]; 3616 for (j=0; j<nzl; j++) { 3617 pv[j] = rtmp[pj[j]]; 3618 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3619 } 3620 3621 /* diagonal: invert diagonal entries for simplier triangular solves */ 3622 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3623 b->a[bdiag[i]] = 1.0/rtmp[i]; 3624 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3625 3626 /* U-part */ 3627 pv = b->a + bdiag[i+1] + 1; 3628 pj = bj + bdiag[i+1] + 1; 3629 nzu = bdiag[i] - bdiag[i+1] - 1; 3630 for (j=0; j<nzu; j++) { 3631 pv[j] = rtmp[pj[j]]; 3632 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3633 } 3634 /* printf("\n"); */ 3635 } 3636 3637 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3638 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3639 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3640 3641 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3642 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3643 if (row_identity && col_identity) { 3644 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3645 } else { 3646 C->ops->solve = MatSolve_SeqAIJ; 3647 } 3648 C->ops->solveadd = 0; 3649 C->ops->solvetranspose = 0; 3650 C->ops->solvetransposeadd = 0; 3651 C->ops->matsolve = 0; 3652 C->assembled = PETSC_TRUE; 3653 C->preallocated = PETSC_TRUE; 3654 3655 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3656 PetscFunctionReturn(0); 3657 } 3658