1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 /* 8 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 9 10 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 11 */ 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 13 { 14 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 15 PetscErrorCode ierr; 16 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 17 const PetscInt *ai = a->i, *aj = a->j; 18 const PetscScalar *aa = a->a; 19 PetscBool *done; 20 PetscReal best,past = 0,future; 21 22 PetscFunctionBegin; 23 /* pick initial row */ 24 best = -1; 25 for (i=0; i<n; i++) { 26 future = 0.0; 27 for (j=ai[i]; j<ai[i+1]; j++) { 28 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 29 else past = PetscAbsScalar(aa[j]); 30 } 31 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 32 if (past/future > best) { 33 best = past/future; 34 current = i; 35 } 36 } 37 38 ierr = PetscMalloc1(n,&done);CHKERRQ(ierr); 39 ierr = PetscArrayzero(done,n);CHKERRQ(ierr); 40 ierr = PetscMalloc1(n,&order);CHKERRQ(ierr); 41 order[0] = current; 42 for (i=0; i<n-1; i++) { 43 done[current] = PETSC_TRUE; 44 best = -1; 45 /* loop over all neighbors of current pivot */ 46 for (j=ai[current]; j<ai[current+1]; j++) { 47 jj = aj[j]; 48 if (done[jj]) continue; 49 /* loop over columns of potential next row computing weights for below and above diagonal */ 50 past = future = 0.0; 51 for (k=ai[jj]; k<ai[jj+1]; k++) { 52 kk = aj[k]; 53 if (done[kk]) past += PetscAbsScalar(aa[k]); 54 else if (kk != jj) future += PetscAbsScalar(aa[k]); 55 } 56 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 57 if (past/future > best) { 58 best = past/future; 59 newcurrent = jj; 60 } 61 } 62 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 63 best = -1; 64 for (k=0; k<n; k++) { 65 if (done[k]) continue; 66 future = 0.0; 67 past = 0.0; 68 for (j=ai[k]; j<ai[k+1]; j++) { 69 kk = aj[j]; 70 if (done[kk]) past += PetscAbsScalar(aa[j]); 71 else if (kk != k) future += PetscAbsScalar(aa[j]); 72 } 73 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 74 if (past/future > best) { 75 best = past/future; 76 newcurrent = k; 77 } 78 } 79 } 80 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 81 current = newcurrent; 82 order[i+1] = current; 83 } 84 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 85 *icol = *irow; 86 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 87 ierr = PetscFree(done);CHKERRQ(ierr); 88 ierr = PetscFree(order);CHKERRQ(ierr); 89 PetscFunctionReturn(0); 90 } 91 92 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 93 { 94 PetscInt n = A->rmap->n; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 #if defined(PETSC_USE_COMPLEX) 99 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 100 #endif 101 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 102 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 103 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 104 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 105 106 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 107 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 108 109 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 110 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 111 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 112 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 113 114 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 115 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 116 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 117 (*B)->factortype = ftype; 118 119 ierr = PetscFree((*B)->solvertype);CHKERRQ(ierr); 120 ierr = PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 125 { 126 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 127 IS isicol; 128 PetscErrorCode ierr; 129 const PetscInt *r,*ic; 130 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 131 PetscInt *bi,*bj,*ajtmp; 132 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 133 PetscReal f; 134 PetscInt nlnk,*lnk,k,**bi_ptr; 135 PetscFreeSpaceList free_space=NULL,current_space=NULL; 136 PetscBT lnkbt; 137 PetscBool missing; 138 139 PetscFunctionBegin; 140 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 141 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 142 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 143 144 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 145 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 146 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 147 148 /* get new row pointers */ 149 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 150 bi[0] = 0; 151 152 /* bdiag is location of diagonal in factor */ 153 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 154 bdiag[0] = 0; 155 156 /* linked list for storing column indices of the active row */ 157 nlnk = n + 1; 158 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 159 160 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 161 162 /* initial FreeSpace size is f*(ai[n]+1) */ 163 f = info->fill; 164 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 165 current_space = free_space; 166 167 for (i=0; i<n; i++) { 168 /* copy previous fill into linked list */ 169 nzi = 0; 170 nnz = ai[r[i]+1] - ai[r[i]]; 171 ajtmp = aj + ai[r[i]]; 172 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 173 nzi += nlnk; 174 175 /* add pivot rows into linked list */ 176 row = lnk[n]; 177 while (row < i) { 178 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 179 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 180 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 181 nzi += nlnk; 182 row = lnk[row]; 183 } 184 bi[i+1] = bi[i] + nzi; 185 im[i] = nzi; 186 187 /* mark bdiag */ 188 nzbd = 0; 189 nnz = nzi; 190 k = lnk[n]; 191 while (nnz-- && k < i) { 192 nzbd++; 193 k = lnk[k]; 194 } 195 bdiag[i] = bi[i] + nzbd; 196 197 /* if free space is not available, make more free space */ 198 if (current_space->local_remaining<nzi) { 199 nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */ 200 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 201 reallocs++; 202 } 203 204 /* copy data into free space, then initialize lnk */ 205 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 206 207 bi_ptr[i] = current_space->array; 208 current_space->array += nzi; 209 current_space->local_used += nzi; 210 current_space->local_remaining -= nzi; 211 } 212 #if defined(PETSC_USE_INFO) 213 if (ai[n] != 0) { 214 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 215 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 216 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 217 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 218 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 219 } else { 220 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 221 } 222 #endif 223 224 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 226 227 /* destroy list of free space and other temporary array(s) */ 228 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 229 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 230 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 231 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 232 233 /* put together the new matrix */ 234 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 235 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 236 b = (Mat_SeqAIJ*)(B)->data; 237 238 b->free_a = PETSC_TRUE; 239 b->free_ij = PETSC_TRUE; 240 b->singlemalloc = PETSC_FALSE; 241 242 ierr = PetscMalloc1(bi[n]+1,&b->a);CHKERRQ(ierr); 243 b->j = bj; 244 b->i = bi; 245 b->diag = bdiag; 246 b->ilen = 0; 247 b->imax = 0; 248 b->row = isrow; 249 b->col = iscol; 250 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 251 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 252 b->icol = isicol; 253 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 254 255 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 256 ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 257 b->maxnz = b->nz = bi[n]; 258 259 (B)->factortype = MAT_FACTOR_LU; 260 (B)->info.factor_mallocs = reallocs; 261 (B)->info.fill_ratio_given = f; 262 263 if (ai[n]) { 264 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 265 } else { 266 (B)->info.fill_ratio_needed = 0.0; 267 } 268 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 269 if (a->inode.size) { 270 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 271 } 272 PetscFunctionReturn(0); 273 } 274 275 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 276 { 277 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 278 IS isicol; 279 PetscErrorCode ierr; 280 const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp; 281 PetscInt i,n=A->rmap->n; 282 PetscInt *bi,*bj; 283 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 284 PetscReal f; 285 PetscInt nlnk,*lnk,k,**bi_ptr; 286 PetscFreeSpaceList free_space=NULL,current_space=NULL; 287 PetscBT lnkbt; 288 PetscBool missing; 289 290 PetscFunctionBegin; 291 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 292 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 293 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 294 295 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 296 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 297 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 298 299 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 300 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 301 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 302 bi[0] = bdiag[0] = 0; 303 304 /* linked list for storing column indices of the active row */ 305 nlnk = n + 1; 306 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 307 308 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 309 310 /* initial FreeSpace size is f*(ai[n]+1) */ 311 f = info->fill; 312 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 313 current_space = free_space; 314 315 for (i=0; i<n; i++) { 316 /* copy previous fill into linked list */ 317 nzi = 0; 318 nnz = ai[r[i]+1] - ai[r[i]]; 319 ajtmp = aj + ai[r[i]]; 320 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 321 nzi += nlnk; 322 323 /* add pivot rows into linked list */ 324 row = lnk[n]; 325 while (row < i) { 326 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 327 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 328 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 329 nzi += nlnk; 330 row = lnk[row]; 331 } 332 bi[i+1] = bi[i] + nzi; 333 im[i] = nzi; 334 335 /* mark bdiag */ 336 nzbd = 0; 337 nnz = nzi; 338 k = lnk[n]; 339 while (nnz-- && k < i) { 340 nzbd++; 341 k = lnk[k]; 342 } 343 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 344 345 /* if free space is not available, make more free space */ 346 if (current_space->local_remaining<nzi) { 347 /* estimated additional space needed */ 348 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi)); 349 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 350 reallocs++; 351 } 352 353 /* copy data into free space, then initialize lnk */ 354 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 355 356 bi_ptr[i] = current_space->array; 357 current_space->array += nzi; 358 current_space->local_used += nzi; 359 current_space->local_remaining -= nzi; 360 } 361 362 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 363 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 364 365 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 366 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 367 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 368 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 369 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 370 371 /* put together the new matrix */ 372 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 373 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 374 b = (Mat_SeqAIJ*)(B)->data; 375 376 b->free_a = PETSC_TRUE; 377 b->free_ij = PETSC_TRUE; 378 b->singlemalloc = PETSC_FALSE; 379 380 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 381 382 b->j = bj; 383 b->i = bi; 384 b->diag = bdiag; 385 b->ilen = 0; 386 b->imax = 0; 387 b->row = isrow; 388 b->col = iscol; 389 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 390 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 391 b->icol = isicol; 392 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 393 394 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 395 ierr = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 396 b->maxnz = b->nz = bdiag[0]+1; 397 398 B->factortype = MAT_FACTOR_LU; 399 B->info.factor_mallocs = reallocs; 400 B->info.fill_ratio_given = f; 401 402 if (ai[n]) { 403 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 404 } else { 405 B->info.fill_ratio_needed = 0.0; 406 } 407 #if defined(PETSC_USE_INFO) 408 if (ai[n] != 0) { 409 PetscReal af = B->info.fill_ratio_needed; 410 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 411 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 412 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 413 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 414 } else { 415 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 416 } 417 #endif 418 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 419 if (a->inode.size) { 420 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 421 } 422 ierr = MatSeqAIJCheckInode_FactorLU(B);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 /* 427 Trouble in factorization, should we dump the original matrix? 428 */ 429 PetscErrorCode MatFactorDumpMatrix(Mat A) 430 { 431 PetscErrorCode ierr; 432 PetscBool flg = PETSC_FALSE; 433 434 PetscFunctionBegin; 435 ierr = PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr); 436 if (flg) { 437 PetscViewer viewer; 438 char filename[PETSC_MAX_PATH_LEN]; 439 440 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 441 ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 442 ierr = MatView(A,viewer);CHKERRQ(ierr); 443 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 444 } 445 PetscFunctionReturn(0); 446 } 447 448 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 449 { 450 Mat C =B; 451 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 452 IS isrow = b->row,isicol = b->icol; 453 PetscErrorCode ierr; 454 const PetscInt *r,*ic,*ics; 455 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 456 PetscInt i,j,k,nz,nzL,row,*pj; 457 const PetscInt *ajtmp,*bjtmp; 458 MatScalar *rtmp,*pc,multiplier,*pv; 459 const MatScalar *aa=a->a,*v; 460 PetscBool row_identity,col_identity; 461 FactorShiftCtx sctx; 462 const PetscInt *ddiag; 463 PetscReal rs; 464 MatScalar d; 465 466 PetscFunctionBegin; 467 /* MatPivotSetUp(): initialize shift context sctx */ 468 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 469 470 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 471 ddiag = a->diag; 472 sctx.shift_top = info->zeropivot; 473 for (i=0; i<n; i++) { 474 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 475 d = (aa)[ddiag[i]]; 476 rs = -PetscAbsScalar(d) - PetscRealPart(d); 477 v = aa+ai[i]; 478 nz = ai[i+1] - ai[i]; 479 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 480 if (rs>sctx.shift_top) sctx.shift_top = rs; 481 } 482 sctx.shift_top *= 1.1; 483 sctx.nshift_max = 5; 484 sctx.shift_lo = 0.; 485 sctx.shift_hi = 1.; 486 } 487 488 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 489 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 490 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 491 ics = ic; 492 493 do { 494 sctx.newshift = PETSC_FALSE; 495 for (i=0; i<n; i++) { 496 /* zero rtmp */ 497 /* L part */ 498 nz = bi[i+1] - bi[i]; 499 bjtmp = bj + bi[i]; 500 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 501 502 /* U part */ 503 nz = bdiag[i]-bdiag[i+1]; 504 bjtmp = bj + bdiag[i+1]+1; 505 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 506 507 /* load in initial (unfactored row) */ 508 nz = ai[r[i]+1] - ai[r[i]]; 509 ajtmp = aj + ai[r[i]]; 510 v = aa + ai[r[i]]; 511 for (j=0; j<nz; j++) { 512 rtmp[ics[ajtmp[j]]] = v[j]; 513 } 514 /* ZeropivotApply() */ 515 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 516 517 /* elimination */ 518 bjtmp = bj + bi[i]; 519 row = *bjtmp++; 520 nzL = bi[i+1] - bi[i]; 521 for (k=0; k < nzL; k++) { 522 pc = rtmp + row; 523 if (*pc != 0.0) { 524 pv = b->a + bdiag[row]; 525 multiplier = *pc * (*pv); 526 *pc = multiplier; 527 528 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 529 pv = b->a + bdiag[row+1]+1; 530 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 531 532 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 533 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 534 } 535 row = *bjtmp++; 536 } 537 538 /* finished row so stick it into b->a */ 539 rs = 0.0; 540 /* L part */ 541 pv = b->a + bi[i]; 542 pj = b->j + bi[i]; 543 nz = bi[i+1] - bi[i]; 544 for (j=0; j<nz; j++) { 545 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 546 } 547 548 /* U part */ 549 pv = b->a + bdiag[i+1]+1; 550 pj = b->j + bdiag[i+1]+1; 551 nz = bdiag[i] - bdiag[i+1]-1; 552 for (j=0; j<nz; j++) { 553 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 554 } 555 556 sctx.rs = rs; 557 sctx.pv = rtmp[i]; 558 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 559 if (sctx.newshift) break; /* break for-loop */ 560 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 561 562 /* Mark diagonal and invert diagonal for simplier triangular solves */ 563 pv = b->a + bdiag[i]; 564 *pv = 1.0/rtmp[i]; 565 566 } /* endof for (i=0; i<n; i++) { */ 567 568 /* MatPivotRefine() */ 569 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 570 /* 571 * if no shift in this attempt & shifting & started shifting & can refine, 572 * then try lower shift 573 */ 574 sctx.shift_hi = sctx.shift_fraction; 575 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 576 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 577 sctx.newshift = PETSC_TRUE; 578 sctx.nshift++; 579 } 580 } while (sctx.newshift); 581 582 ierr = PetscFree(rtmp);CHKERRQ(ierr); 583 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 584 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 585 586 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 587 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 588 if (b->inode.size) { 589 C->ops->solve = MatSolve_SeqAIJ_Inode; 590 } else if (row_identity && col_identity) { 591 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 592 } else { 593 C->ops->solve = MatSolve_SeqAIJ; 594 } 595 C->ops->solveadd = MatSolveAdd_SeqAIJ; 596 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 597 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 598 C->ops->matsolve = MatMatSolve_SeqAIJ; 599 C->assembled = PETSC_TRUE; 600 C->preallocated = PETSC_TRUE; 601 602 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 603 604 /* MatShiftView(A,info,&sctx) */ 605 if (sctx.nshift) { 606 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 607 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 608 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 609 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 610 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 611 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 612 } 613 } 614 PetscFunctionReturn(0); 615 } 616 617 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 618 { 619 Mat C =B; 620 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 621 IS isrow = b->row,isicol = b->icol; 622 PetscErrorCode ierr; 623 const PetscInt *r,*ic,*ics; 624 PetscInt nz,row,i,j,n=A->rmap->n,diag; 625 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 626 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 627 MatScalar *pv,*rtmp,*pc,multiplier,d; 628 const MatScalar *v,*aa=a->a; 629 PetscReal rs=0.0; 630 FactorShiftCtx sctx; 631 const PetscInt *ddiag; 632 PetscBool row_identity, col_identity; 633 634 PetscFunctionBegin; 635 /* MatPivotSetUp(): initialize shift context sctx */ 636 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 637 638 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 639 ddiag = a->diag; 640 sctx.shift_top = info->zeropivot; 641 for (i=0; i<n; i++) { 642 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 643 d = (aa)[ddiag[i]]; 644 rs = -PetscAbsScalar(d) - PetscRealPart(d); 645 v = aa+ai[i]; 646 nz = ai[i+1] - ai[i]; 647 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 648 if (rs>sctx.shift_top) sctx.shift_top = rs; 649 } 650 sctx.shift_top *= 1.1; 651 sctx.nshift_max = 5; 652 sctx.shift_lo = 0.; 653 sctx.shift_hi = 1.; 654 } 655 656 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 657 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 658 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 659 ics = ic; 660 661 do { 662 sctx.newshift = PETSC_FALSE; 663 for (i=0; i<n; i++) { 664 nz = bi[i+1] - bi[i]; 665 bjtmp = bj + bi[i]; 666 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 667 668 /* load in initial (unfactored row) */ 669 nz = ai[r[i]+1] - ai[r[i]]; 670 ajtmp = aj + ai[r[i]]; 671 v = aa + ai[r[i]]; 672 for (j=0; j<nz; j++) { 673 rtmp[ics[ajtmp[j]]] = v[j]; 674 } 675 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 676 677 row = *bjtmp++; 678 while (row < i) { 679 pc = rtmp + row; 680 if (*pc != 0.0) { 681 pv = b->a + diag_offset[row]; 682 pj = b->j + diag_offset[row] + 1; 683 multiplier = *pc / *pv++; 684 *pc = multiplier; 685 nz = bi[row+1] - diag_offset[row] - 1; 686 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 687 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 688 } 689 row = *bjtmp++; 690 } 691 /* finished row so stick it into b->a */ 692 pv = b->a + bi[i]; 693 pj = b->j + bi[i]; 694 nz = bi[i+1] - bi[i]; 695 diag = diag_offset[i] - bi[i]; 696 rs = 0.0; 697 for (j=0; j<nz; j++) { 698 pv[j] = rtmp[pj[j]]; 699 rs += PetscAbsScalar(pv[j]); 700 } 701 rs -= PetscAbsScalar(pv[diag]); 702 703 sctx.rs = rs; 704 sctx.pv = pv[diag]; 705 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 706 if (sctx.newshift) break; 707 pv[diag] = sctx.pv; 708 } 709 710 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 711 /* 712 * if no shift in this attempt & shifting & started shifting & can refine, 713 * then try lower shift 714 */ 715 sctx.shift_hi = sctx.shift_fraction; 716 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 717 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 718 sctx.newshift = PETSC_TRUE; 719 sctx.nshift++; 720 } 721 } while (sctx.newshift); 722 723 /* invert diagonal entries for simplier triangular solves */ 724 for (i=0; i<n; i++) { 725 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 726 } 727 ierr = PetscFree(rtmp);CHKERRQ(ierr); 728 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 729 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 730 731 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 732 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 733 if (row_identity && col_identity) { 734 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 735 } else { 736 C->ops->solve = MatSolve_SeqAIJ_inplace; 737 } 738 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 739 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 740 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 741 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 742 743 C->assembled = PETSC_TRUE; 744 C->preallocated = PETSC_TRUE; 745 746 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 747 if (sctx.nshift) { 748 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 749 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 750 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 751 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 752 } 753 } 754 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 755 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 756 757 ierr = MatSeqAIJCheckInode(C);CHKERRQ(ierr); 758 PetscFunctionReturn(0); 759 } 760 761 /* 762 This routine implements inplace ILU(0) with row or/and column permutations. 763 Input: 764 A - original matrix 765 Output; 766 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 767 a->j (col index) is permuted by the inverse of colperm, then sorted 768 a->a reordered accordingly with a->j 769 a->diag (ptr to diagonal elements) is updated. 770 */ 771 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 772 { 773 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 774 IS isrow = a->row,isicol = a->icol; 775 PetscErrorCode ierr; 776 const PetscInt *r,*ic,*ics; 777 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 778 PetscInt *ajtmp,nz,row; 779 PetscInt *diag = a->diag,nbdiag,*pj; 780 PetscScalar *rtmp,*pc,multiplier,d; 781 MatScalar *pv,*v; 782 PetscReal rs; 783 FactorShiftCtx sctx; 784 const MatScalar *aa=a->a,*vtmp; 785 786 PetscFunctionBegin; 787 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 788 789 /* MatPivotSetUp(): initialize shift context sctx */ 790 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 791 792 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 793 const PetscInt *ddiag = a->diag; 794 sctx.shift_top = info->zeropivot; 795 for (i=0; i<n; i++) { 796 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 797 d = (aa)[ddiag[i]]; 798 rs = -PetscAbsScalar(d) - PetscRealPart(d); 799 vtmp = aa+ai[i]; 800 nz = ai[i+1] - ai[i]; 801 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 802 if (rs>sctx.shift_top) sctx.shift_top = rs; 803 } 804 sctx.shift_top *= 1.1; 805 sctx.nshift_max = 5; 806 sctx.shift_lo = 0.; 807 sctx.shift_hi = 1.; 808 } 809 810 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 811 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 812 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 813 ierr = PetscArrayzero(rtmp,n+1);CHKERRQ(ierr); 814 ics = ic; 815 816 #if defined(MV) 817 sctx.shift_top = 0.; 818 sctx.nshift_max = 0; 819 sctx.shift_lo = 0.; 820 sctx.shift_hi = 0.; 821 sctx.shift_fraction = 0.; 822 823 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 824 sctx.shift_top = 0.; 825 for (i=0; i<n; i++) { 826 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 827 d = (a->a)[diag[i]]; 828 rs = -PetscAbsScalar(d) - PetscRealPart(d); 829 v = a->a+ai[i]; 830 nz = ai[i+1] - ai[i]; 831 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 832 if (rs>sctx.shift_top) sctx.shift_top = rs; 833 } 834 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 835 sctx.shift_top *= 1.1; 836 sctx.nshift_max = 5; 837 sctx.shift_lo = 0.; 838 sctx.shift_hi = 1.; 839 } 840 841 sctx.shift_amount = 0.; 842 sctx.nshift = 0; 843 #endif 844 845 do { 846 sctx.newshift = PETSC_FALSE; 847 for (i=0; i<n; i++) { 848 /* load in initial unfactored row */ 849 nz = ai[r[i]+1] - ai[r[i]]; 850 ajtmp = aj + ai[r[i]]; 851 v = a->a + ai[r[i]]; 852 /* sort permuted ajtmp and values v accordingly */ 853 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 854 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 855 856 diag[r[i]] = ai[r[i]]; 857 for (j=0; j<nz; j++) { 858 rtmp[ajtmp[j]] = v[j]; 859 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 860 } 861 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 862 863 row = *ajtmp++; 864 while (row < i) { 865 pc = rtmp + row; 866 if (*pc != 0.0) { 867 pv = a->a + diag[r[row]]; 868 pj = aj + diag[r[row]] + 1; 869 870 multiplier = *pc / *pv++; 871 *pc = multiplier; 872 nz = ai[r[row]+1] - diag[r[row]] - 1; 873 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 874 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 875 } 876 row = *ajtmp++; 877 } 878 /* finished row so overwrite it onto a->a */ 879 pv = a->a + ai[r[i]]; 880 pj = aj + ai[r[i]]; 881 nz = ai[r[i]+1] - ai[r[i]]; 882 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 883 884 rs = 0.0; 885 for (j=0; j<nz; j++) { 886 pv[j] = rtmp[pj[j]]; 887 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 888 } 889 890 sctx.rs = rs; 891 sctx.pv = pv[nbdiag]; 892 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 893 if (sctx.newshift) break; 894 pv[nbdiag] = sctx.pv; 895 } 896 897 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 898 /* 899 * if no shift in this attempt & shifting & started shifting & can refine, 900 * then try lower shift 901 */ 902 sctx.shift_hi = sctx.shift_fraction; 903 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 904 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 905 sctx.newshift = PETSC_TRUE; 906 sctx.nshift++; 907 } 908 } while (sctx.newshift); 909 910 /* invert diagonal entries for simplier triangular solves */ 911 for (i=0; i<n; i++) { 912 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 913 } 914 915 ierr = PetscFree(rtmp);CHKERRQ(ierr); 916 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 917 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 918 919 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 920 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 921 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 922 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 923 924 A->assembled = PETSC_TRUE; 925 A->preallocated = PETSC_TRUE; 926 927 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 928 if (sctx.nshift) { 929 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 930 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 931 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 932 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 933 } 934 } 935 PetscFunctionReturn(0); 936 } 937 938 /* ----------------------------------------------------------- */ 939 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 940 { 941 PetscErrorCode ierr; 942 Mat C; 943 944 PetscFunctionBegin; 945 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 946 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 947 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 948 949 A->ops->solve = C->ops->solve; 950 A->ops->solvetranspose = C->ops->solvetranspose; 951 952 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 953 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 /* ----------------------------------------------------------- */ 957 958 959 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 960 { 961 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 962 IS iscol = a->col,isrow = a->row; 963 PetscErrorCode ierr; 964 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 965 PetscInt nz; 966 const PetscInt *rout,*cout,*r,*c; 967 PetscScalar *x,*tmp,*tmps,sum; 968 const PetscScalar *b; 969 const MatScalar *aa = a->a,*v; 970 971 PetscFunctionBegin; 972 if (!n) PetscFunctionReturn(0); 973 974 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 975 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 976 tmp = a->solve_work; 977 978 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 979 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 980 981 /* forward solve the lower triangular */ 982 tmp[0] = b[*r++]; 983 tmps = tmp; 984 for (i=1; i<n; i++) { 985 v = aa + ai[i]; 986 vi = aj + ai[i]; 987 nz = a->diag[i] - ai[i]; 988 sum = b[*r++]; 989 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 990 tmp[i] = sum; 991 } 992 993 /* backward solve the upper triangular */ 994 for (i=n-1; i>=0; i--) { 995 v = aa + a->diag[i] + 1; 996 vi = aj + a->diag[i] + 1; 997 nz = ai[i+1] - a->diag[i] - 1; 998 sum = tmp[i]; 999 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1000 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1001 } 1002 1003 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1004 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1005 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1006 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1007 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1012 { 1013 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1014 IS iscol = a->col,isrow = a->row; 1015 PetscErrorCode ierr; 1016 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1017 PetscInt nz,neq; 1018 const PetscInt *rout,*cout,*r,*c; 1019 PetscScalar *x,*tmp,*tmps,sum; 1020 const PetscScalar *aa = a->a,*v; 1021 const PetscScalar *b; 1022 PetscBool bisdense,xisdense; 1023 1024 PetscFunctionBegin; 1025 if (!n) PetscFunctionReturn(0); 1026 1027 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1028 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1029 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1030 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1031 1032 ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); 1033 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1034 1035 tmp = a->solve_work; 1036 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1037 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1038 1039 for (neq=0; neq<B->cmap->n; neq++) { 1040 /* forward solve the lower triangular */ 1041 tmp[0] = b[r[0]]; 1042 tmps = tmp; 1043 for (i=1; i<n; i++) { 1044 v = aa + ai[i]; 1045 vi = aj + ai[i]; 1046 nz = a->diag[i] - ai[i]; 1047 sum = b[r[i]]; 1048 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1049 tmp[i] = sum; 1050 } 1051 /* backward solve the upper triangular */ 1052 for (i=n-1; i>=0; i--) { 1053 v = aa + a->diag[i] + 1; 1054 vi = aj + a->diag[i] + 1; 1055 nz = ai[i+1] - a->diag[i] - 1; 1056 sum = tmp[i]; 1057 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1058 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1059 } 1060 1061 b += n; 1062 x += n; 1063 } 1064 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1065 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1066 ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); 1067 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1068 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1073 { 1074 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1075 IS iscol = a->col,isrow = a->row; 1076 PetscErrorCode ierr; 1077 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1078 PetscInt nz,neq; 1079 const PetscInt *rout,*cout,*r,*c; 1080 PetscScalar *x,*tmp,sum; 1081 const PetscScalar *b; 1082 const PetscScalar *aa = a->a,*v; 1083 PetscBool bisdense,xisdense; 1084 1085 PetscFunctionBegin; 1086 if (!n) PetscFunctionReturn(0); 1087 1088 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1089 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1090 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1091 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1092 1093 ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); 1094 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1095 1096 tmp = a->solve_work; 1097 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1098 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1099 1100 for (neq=0; neq<B->cmap->n; neq++) { 1101 /* forward solve the lower triangular */ 1102 tmp[0] = b[r[0]]; 1103 v = aa; 1104 vi = aj; 1105 for (i=1; i<n; i++) { 1106 nz = ai[i+1] - ai[i]; 1107 sum = b[r[i]]; 1108 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1109 tmp[i] = sum; 1110 v += nz; vi += nz; 1111 } 1112 1113 /* backward solve the upper triangular */ 1114 for (i=n-1; i>=0; i--) { 1115 v = aa + adiag[i+1]+1; 1116 vi = aj + adiag[i+1]+1; 1117 nz = adiag[i]-adiag[i+1]-1; 1118 sum = tmp[i]; 1119 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1120 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1121 } 1122 1123 b += n; 1124 x += n; 1125 } 1126 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1127 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1128 ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); 1129 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1130 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1135 { 1136 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1137 IS iscol = a->col,isrow = a->row; 1138 PetscErrorCode ierr; 1139 const PetscInt *r,*c,*rout,*cout; 1140 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1141 PetscInt nz,row; 1142 PetscScalar *x,*tmp,*tmps,sum; 1143 const PetscScalar *b; 1144 const MatScalar *aa = a->a,*v; 1145 1146 PetscFunctionBegin; 1147 if (!n) PetscFunctionReturn(0); 1148 1149 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1150 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1151 tmp = a->solve_work; 1152 1153 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1154 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1155 1156 /* forward solve the lower triangular */ 1157 tmp[0] = b[*r++]; 1158 tmps = tmp; 1159 for (row=1; row<n; row++) { 1160 i = rout[row]; /* permuted row */ 1161 v = aa + ai[i]; 1162 vi = aj + ai[i]; 1163 nz = a->diag[i] - ai[i]; 1164 sum = b[*r++]; 1165 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1166 tmp[row] = sum; 1167 } 1168 1169 /* backward solve the upper triangular */ 1170 for (row=n-1; row>=0; row--) { 1171 i = rout[row]; /* permuted row */ 1172 v = aa + a->diag[i] + 1; 1173 vi = aj + a->diag[i] + 1; 1174 nz = ai[i+1] - a->diag[i] - 1; 1175 sum = tmp[row]; 1176 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1177 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1178 } 1179 1180 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1181 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1182 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1183 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1184 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 /* ----------------------------------------------------------- */ 1189 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1190 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1191 { 1192 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1193 PetscErrorCode ierr; 1194 PetscInt n = A->rmap->n; 1195 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1196 PetscScalar *x; 1197 const PetscScalar *b; 1198 const MatScalar *aa = a->a; 1199 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1200 PetscInt adiag_i,i,nz,ai_i; 1201 const PetscInt *vi; 1202 const MatScalar *v; 1203 PetscScalar sum; 1204 #endif 1205 1206 PetscFunctionBegin; 1207 if (!n) PetscFunctionReturn(0); 1208 1209 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1210 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1211 1212 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1213 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1214 #else 1215 /* forward solve the lower triangular */ 1216 x[0] = b[0]; 1217 for (i=1; i<n; i++) { 1218 ai_i = ai[i]; 1219 v = aa + ai_i; 1220 vi = aj + ai_i; 1221 nz = adiag[i] - ai_i; 1222 sum = b[i]; 1223 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1224 x[i] = sum; 1225 } 1226 1227 /* backward solve the upper triangular */ 1228 for (i=n-1; i>=0; i--) { 1229 adiag_i = adiag[i]; 1230 v = aa + adiag_i + 1; 1231 vi = aj + adiag_i + 1; 1232 nz = ai[i+1] - adiag_i - 1; 1233 sum = x[i]; 1234 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1235 x[i] = sum*aa[adiag_i]; 1236 } 1237 #endif 1238 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1239 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1240 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1245 { 1246 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1247 IS iscol = a->col,isrow = a->row; 1248 PetscErrorCode ierr; 1249 PetscInt i, n = A->rmap->n,j; 1250 PetscInt nz; 1251 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1252 PetscScalar *x,*tmp,sum; 1253 const PetscScalar *b; 1254 const MatScalar *aa = a->a,*v; 1255 1256 PetscFunctionBegin; 1257 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1258 1259 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1260 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1261 tmp = a->solve_work; 1262 1263 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1264 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1265 1266 /* forward solve the lower triangular */ 1267 tmp[0] = b[*r++]; 1268 for (i=1; i<n; i++) { 1269 v = aa + ai[i]; 1270 vi = aj + ai[i]; 1271 nz = a->diag[i] - ai[i]; 1272 sum = b[*r++]; 1273 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1274 tmp[i] = sum; 1275 } 1276 1277 /* backward solve the upper triangular */ 1278 for (i=n-1; i>=0; i--) { 1279 v = aa + a->diag[i] + 1; 1280 vi = aj + a->diag[i] + 1; 1281 nz = ai[i+1] - a->diag[i] - 1; 1282 sum = tmp[i]; 1283 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1284 tmp[i] = sum*aa[a->diag[i]]; 1285 x[*c--] += tmp[i]; 1286 } 1287 1288 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1289 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1290 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1291 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1292 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1297 { 1298 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1299 IS iscol = a->col,isrow = a->row; 1300 PetscErrorCode ierr; 1301 PetscInt i, n = A->rmap->n,j; 1302 PetscInt nz; 1303 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1304 PetscScalar *x,*tmp,sum; 1305 const PetscScalar *b; 1306 const MatScalar *aa = a->a,*v; 1307 1308 PetscFunctionBegin; 1309 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1310 1311 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1312 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1313 tmp = a->solve_work; 1314 1315 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1316 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1317 1318 /* forward solve the lower triangular */ 1319 tmp[0] = b[r[0]]; 1320 v = aa; 1321 vi = aj; 1322 for (i=1; i<n; i++) { 1323 nz = ai[i+1] - ai[i]; 1324 sum = b[r[i]]; 1325 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1326 tmp[i] = sum; 1327 v += nz; 1328 vi += nz; 1329 } 1330 1331 /* backward solve the upper triangular */ 1332 v = aa + adiag[n-1]; 1333 vi = aj + adiag[n-1]; 1334 for (i=n-1; i>=0; i--) { 1335 nz = adiag[i] - adiag[i+1] - 1; 1336 sum = tmp[i]; 1337 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1338 tmp[i] = sum*v[nz]; 1339 x[c[i]] += tmp[i]; 1340 v += nz+1; vi += nz+1; 1341 } 1342 1343 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1344 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1345 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1346 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1347 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1352 { 1353 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1354 IS iscol = a->col,isrow = a->row; 1355 PetscErrorCode ierr; 1356 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1357 PetscInt i,n = A->rmap->n,j; 1358 PetscInt nz; 1359 PetscScalar *x,*tmp,s1; 1360 const MatScalar *aa = a->a,*v; 1361 const PetscScalar *b; 1362 1363 PetscFunctionBegin; 1364 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1365 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1366 tmp = a->solve_work; 1367 1368 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1369 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1370 1371 /* copy the b into temp work space according to permutation */ 1372 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1373 1374 /* forward solve the U^T */ 1375 for (i=0; i<n; i++) { 1376 v = aa + diag[i]; 1377 vi = aj + diag[i] + 1; 1378 nz = ai[i+1] - diag[i] - 1; 1379 s1 = tmp[i]; 1380 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1381 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1382 tmp[i] = s1; 1383 } 1384 1385 /* backward solve the L^T */ 1386 for (i=n-1; i>=0; i--) { 1387 v = aa + diag[i] - 1; 1388 vi = aj + diag[i] - 1; 1389 nz = diag[i] - ai[i]; 1390 s1 = tmp[i]; 1391 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1392 } 1393 1394 /* copy tmp into x according to permutation */ 1395 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1396 1397 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1398 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1399 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1400 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1401 1402 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1407 { 1408 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1409 IS iscol = a->col,isrow = a->row; 1410 PetscErrorCode ierr; 1411 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1412 PetscInt i,n = A->rmap->n,j; 1413 PetscInt nz; 1414 PetscScalar *x,*tmp,s1; 1415 const MatScalar *aa = a->a,*v; 1416 const PetscScalar *b; 1417 1418 PetscFunctionBegin; 1419 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1420 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1421 tmp = a->solve_work; 1422 1423 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1424 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1425 1426 /* copy the b into temp work space according to permutation */ 1427 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1428 1429 /* forward solve the U^T */ 1430 for (i=0; i<n; i++) { 1431 v = aa + adiag[i+1] + 1; 1432 vi = aj + adiag[i+1] + 1; 1433 nz = adiag[i] - adiag[i+1] - 1; 1434 s1 = tmp[i]; 1435 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1436 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1437 tmp[i] = s1; 1438 } 1439 1440 /* backward solve the L^T */ 1441 for (i=n-1; i>=0; i--) { 1442 v = aa + ai[i]; 1443 vi = aj + ai[i]; 1444 nz = ai[i+1] - ai[i]; 1445 s1 = tmp[i]; 1446 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1447 } 1448 1449 /* copy tmp into x according to permutation */ 1450 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1451 1452 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1453 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1454 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1455 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1456 1457 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1458 PetscFunctionReturn(0); 1459 } 1460 1461 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1462 { 1463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1464 IS iscol = a->col,isrow = a->row; 1465 PetscErrorCode ierr; 1466 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1467 PetscInt i,n = A->rmap->n,j; 1468 PetscInt nz; 1469 PetscScalar *x,*tmp,s1; 1470 const MatScalar *aa = a->a,*v; 1471 const PetscScalar *b; 1472 1473 PetscFunctionBegin; 1474 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1475 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1476 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1477 tmp = a->solve_work; 1478 1479 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1480 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1481 1482 /* copy the b into temp work space according to permutation */ 1483 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1484 1485 /* forward solve the U^T */ 1486 for (i=0; i<n; i++) { 1487 v = aa + diag[i]; 1488 vi = aj + diag[i] + 1; 1489 nz = ai[i+1] - diag[i] - 1; 1490 s1 = tmp[i]; 1491 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1492 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1493 tmp[i] = s1; 1494 } 1495 1496 /* backward solve the L^T */ 1497 for (i=n-1; i>=0; i--) { 1498 v = aa + diag[i] - 1; 1499 vi = aj + diag[i] - 1; 1500 nz = diag[i] - ai[i]; 1501 s1 = tmp[i]; 1502 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1503 } 1504 1505 /* copy tmp into x according to permutation */ 1506 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1507 1508 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1509 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1510 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1511 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1512 1513 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1514 PetscFunctionReturn(0); 1515 } 1516 1517 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1518 { 1519 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1520 IS iscol = a->col,isrow = a->row; 1521 PetscErrorCode ierr; 1522 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1523 PetscInt i,n = A->rmap->n,j; 1524 PetscInt nz; 1525 PetscScalar *x,*tmp,s1; 1526 const MatScalar *aa = a->a,*v; 1527 const PetscScalar *b; 1528 1529 PetscFunctionBegin; 1530 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1531 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1532 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1533 tmp = a->solve_work; 1534 1535 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1536 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1537 1538 /* copy the b into temp work space according to permutation */ 1539 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1540 1541 /* forward solve the U^T */ 1542 for (i=0; i<n; i++) { 1543 v = aa + adiag[i+1] + 1; 1544 vi = aj + adiag[i+1] + 1; 1545 nz = adiag[i] - adiag[i+1] - 1; 1546 s1 = tmp[i]; 1547 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1548 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1549 tmp[i] = s1; 1550 } 1551 1552 1553 /* backward solve the L^T */ 1554 for (i=n-1; i>=0; i--) { 1555 v = aa + ai[i]; 1556 vi = aj + ai[i]; 1557 nz = ai[i+1] - ai[i]; 1558 s1 = tmp[i]; 1559 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1560 } 1561 1562 /* copy tmp into x according to permutation */ 1563 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1564 1565 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1566 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1567 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1568 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1569 1570 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 /* ----------------------------------------------------------------*/ 1575 1576 /* 1577 ilu() under revised new data structure. 1578 Factored arrays bj and ba are stored as 1579 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1580 1581 bi=fact->i is an array of size n+1, in which 1582 bi+ 1583 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1584 bi[n]: points to L(n-1,n-1)+1 1585 1586 bdiag=fact->diag is an array of size n+1,in which 1587 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1588 bdiag[n]: points to entry of U(n-1,0)-1 1589 1590 U(i,:) contains bdiag[i] as its last entry, i.e., 1591 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1592 */ 1593 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1594 { 1595 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1596 PetscErrorCode ierr; 1597 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1598 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1599 IS isicol; 1600 1601 PetscFunctionBegin; 1602 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1603 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1604 b = (Mat_SeqAIJ*)(fact)->data; 1605 1606 /* allocate matrix arrays for new data structure */ 1607 ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); 1608 ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1609 1610 b->singlemalloc = PETSC_TRUE; 1611 if (!b->diag) { 1612 ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr); 1613 ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1614 } 1615 bdiag = b->diag; 1616 1617 if (n > 0) { 1618 ierr = PetscArrayzero(b->a,ai[n]);CHKERRQ(ierr); 1619 } 1620 1621 /* set bi and bj with new data structure */ 1622 bi = b->i; 1623 bj = b->j; 1624 1625 /* L part */ 1626 bi[0] = 0; 1627 for (i=0; i<n; i++) { 1628 nz = adiag[i] - ai[i]; 1629 bi[i+1] = bi[i] + nz; 1630 aj = a->j + ai[i]; 1631 for (j=0; j<nz; j++) { 1632 /* *bj = aj[j]; bj++; */ 1633 bj[k++] = aj[j]; 1634 } 1635 } 1636 1637 /* U part */ 1638 bdiag[n] = bi[n]-1; 1639 for (i=n-1; i>=0; i--) { 1640 nz = ai[i+1] - adiag[i] - 1; 1641 aj = a->j + adiag[i] + 1; 1642 for (j=0; j<nz; j++) { 1643 /* *bj = aj[j]; bj++; */ 1644 bj[k++] = aj[j]; 1645 } 1646 /* diag[i] */ 1647 /* *bj = i; bj++; */ 1648 bj[k++] = i; 1649 bdiag[i] = bdiag[i+1] + nz + 1; 1650 } 1651 1652 fact->factortype = MAT_FACTOR_ILU; 1653 fact->info.factor_mallocs = 0; 1654 fact->info.fill_ratio_given = info->fill; 1655 fact->info.fill_ratio_needed = 1.0; 1656 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1657 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1658 1659 b = (Mat_SeqAIJ*)(fact)->data; 1660 b->row = isrow; 1661 b->col = iscol; 1662 b->icol = isicol; 1663 ierr = PetscMalloc1(fact->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1664 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1665 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1666 PetscFunctionReturn(0); 1667 } 1668 1669 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1670 { 1671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1672 IS isicol; 1673 PetscErrorCode ierr; 1674 const PetscInt *r,*ic; 1675 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1676 PetscInt *bi,*cols,nnz,*cols_lvl; 1677 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1678 PetscInt i,levels,diagonal_fill; 1679 PetscBool col_identity,row_identity,missing; 1680 PetscReal f; 1681 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1682 PetscBT lnkbt; 1683 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1684 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1685 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1686 1687 PetscFunctionBegin; 1688 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1689 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1690 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1691 1692 levels = (PetscInt)info->levels; 1693 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1694 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1695 if (!levels && row_identity && col_identity) { 1696 /* special case: ilu(0) with natural ordering */ 1697 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1698 if (a->inode.size) { 1699 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1700 } 1701 PetscFunctionReturn(0); 1702 } 1703 1704 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1705 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1706 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1707 1708 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1709 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1710 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1711 bi[0] = bdiag[0] = 0; 1712 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1713 1714 /* create a linked list for storing column indices of the active row */ 1715 nlnk = n + 1; 1716 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1717 1718 /* initial FreeSpace size is f*(ai[n]+1) */ 1719 f = info->fill; 1720 diagonal_fill = (PetscInt)info->diagonal_fill; 1721 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 1722 current_space = free_space; 1723 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 1724 current_space_lvl = free_space_lvl; 1725 for (i=0; i<n; i++) { 1726 nzi = 0; 1727 /* copy current row into linked list */ 1728 nnz = ai[r[i]+1] - ai[r[i]]; 1729 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1730 cols = aj + ai[r[i]]; 1731 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1732 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1733 nzi += nlnk; 1734 1735 /* make sure diagonal entry is included */ 1736 if (diagonal_fill && lnk[i] == -1) { 1737 fm = n; 1738 while (lnk[fm] < i) fm = lnk[fm]; 1739 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1740 lnk[fm] = i; 1741 lnk_lvl[i] = 0; 1742 nzi++; dcount++; 1743 } 1744 1745 /* add pivot rows into the active row */ 1746 nzbd = 0; 1747 prow = lnk[n]; 1748 while (prow < i) { 1749 nnz = bdiag[prow]; 1750 cols = bj_ptr[prow] + nnz + 1; 1751 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1752 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1753 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1754 nzi += nlnk; 1755 prow = lnk[prow]; 1756 nzbd++; 1757 } 1758 bdiag[i] = nzbd; 1759 bi[i+1] = bi[i] + nzi; 1760 /* if free space is not available, make more free space */ 1761 if (current_space->local_remaining<nzi) { 1762 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */ 1763 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1764 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1765 reallocs++; 1766 } 1767 1768 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1769 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1770 bj_ptr[i] = current_space->array; 1771 bjlvl_ptr[i] = current_space_lvl->array; 1772 1773 /* make sure the active row i has diagonal entry */ 1774 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1775 1776 current_space->array += nzi; 1777 current_space->local_used += nzi; 1778 current_space->local_remaining -= nzi; 1779 current_space_lvl->array += nzi; 1780 current_space_lvl->local_used += nzi; 1781 current_space_lvl->local_remaining -= nzi; 1782 } 1783 1784 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1785 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1786 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1787 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 1788 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1789 1790 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1791 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1792 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1793 1794 #if defined(PETSC_USE_INFO) 1795 { 1796 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1797 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 1798 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1799 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 1800 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1801 if (diagonal_fill) { 1802 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 1803 } 1804 } 1805 #endif 1806 /* put together the new matrix */ 1807 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1808 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 1809 b = (Mat_SeqAIJ*)(fact)->data; 1810 1811 b->free_a = PETSC_TRUE; 1812 b->free_ij = PETSC_TRUE; 1813 b->singlemalloc = PETSC_FALSE; 1814 1815 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 1816 1817 b->j = bj; 1818 b->i = bi; 1819 b->diag = bdiag; 1820 b->ilen = 0; 1821 b->imax = 0; 1822 b->row = isrow; 1823 b->col = iscol; 1824 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1825 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1826 b->icol = isicol; 1827 1828 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 1829 /* In b structure: Free imax, ilen, old a, old j. 1830 Allocate bdiag, solve_work, new a, new j */ 1831 ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1832 b->maxnz = b->nz = bdiag[0]+1; 1833 1834 (fact)->info.factor_mallocs = reallocs; 1835 (fact)->info.fill_ratio_given = f; 1836 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1837 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1838 if (a->inode.size) { 1839 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1840 } 1841 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1842 PetscFunctionReturn(0); 1843 } 1844 1845 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1846 { 1847 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1848 IS isicol; 1849 PetscErrorCode ierr; 1850 const PetscInt *r,*ic; 1851 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1852 PetscInt *bi,*cols,nnz,*cols_lvl; 1853 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1854 PetscInt i,levels,diagonal_fill; 1855 PetscBool col_identity,row_identity; 1856 PetscReal f; 1857 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1858 PetscBT lnkbt; 1859 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1860 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1861 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1862 PetscBool missing; 1863 1864 PetscFunctionBegin; 1865 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1866 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1867 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1868 1869 f = info->fill; 1870 levels = (PetscInt)info->levels; 1871 diagonal_fill = (PetscInt)info->diagonal_fill; 1872 1873 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1874 1875 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1876 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1877 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1878 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1879 1880 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1881 if (a->inode.size) { 1882 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1883 } 1884 fact->factortype = MAT_FACTOR_ILU; 1885 (fact)->info.factor_mallocs = 0; 1886 (fact)->info.fill_ratio_given = info->fill; 1887 (fact)->info.fill_ratio_needed = 1.0; 1888 1889 b = (Mat_SeqAIJ*)(fact)->data; 1890 b->row = isrow; 1891 b->col = iscol; 1892 b->icol = isicol; 1893 ierr = PetscMalloc1((fact)->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1894 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1895 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1900 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1901 1902 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1903 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1904 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1905 bi[0] = bdiag[0] = 0; 1906 1907 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1908 1909 /* create a linked list for storing column indices of the active row */ 1910 nlnk = n + 1; 1911 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1912 1913 /* initial FreeSpace size is f*(ai[n]+1) */ 1914 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 1915 current_space = free_space; 1916 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 1917 current_space_lvl = free_space_lvl; 1918 1919 for (i=0; i<n; i++) { 1920 nzi = 0; 1921 /* copy current row into linked list */ 1922 nnz = ai[r[i]+1] - ai[r[i]]; 1923 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1924 cols = aj + ai[r[i]]; 1925 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1926 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1927 nzi += nlnk; 1928 1929 /* make sure diagonal entry is included */ 1930 if (diagonal_fill && lnk[i] == -1) { 1931 fm = n; 1932 while (lnk[fm] < i) fm = lnk[fm]; 1933 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1934 lnk[fm] = i; 1935 lnk_lvl[i] = 0; 1936 nzi++; dcount++; 1937 } 1938 1939 /* add pivot rows into the active row */ 1940 nzbd = 0; 1941 prow = lnk[n]; 1942 while (prow < i) { 1943 nnz = bdiag[prow]; 1944 cols = bj_ptr[prow] + nnz + 1; 1945 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1946 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1947 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1948 nzi += nlnk; 1949 prow = lnk[prow]; 1950 nzbd++; 1951 } 1952 bdiag[i] = nzbd; 1953 bi[i+1] = bi[i] + nzi; 1954 1955 /* if free space is not available, make more free space */ 1956 if (current_space->local_remaining<nzi) { 1957 nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */ 1958 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1959 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1960 reallocs++; 1961 } 1962 1963 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1964 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1965 bj_ptr[i] = current_space->array; 1966 bjlvl_ptr[i] = current_space_lvl->array; 1967 1968 /* make sure the active row i has diagonal entry */ 1969 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1970 1971 current_space->array += nzi; 1972 current_space->local_used += nzi; 1973 current_space->local_remaining -= nzi; 1974 current_space_lvl->array += nzi; 1975 current_space_lvl->local_used += nzi; 1976 current_space_lvl->local_remaining -= nzi; 1977 } 1978 1979 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1980 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1981 1982 /* destroy list of free space and other temporary arrays */ 1983 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 1984 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 1985 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1986 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1987 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1988 1989 #if defined(PETSC_USE_INFO) 1990 { 1991 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1992 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 1993 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1994 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 1995 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1996 if (diagonal_fill) { 1997 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 1998 } 1999 } 2000 #endif 2001 2002 /* put together the new matrix */ 2003 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2004 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 2005 b = (Mat_SeqAIJ*)(fact)->data; 2006 2007 b->free_a = PETSC_TRUE; 2008 b->free_ij = PETSC_TRUE; 2009 b->singlemalloc = PETSC_FALSE; 2010 2011 ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr); 2012 b->j = bj; 2013 b->i = bi; 2014 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2015 b->diag = bdiag; 2016 b->ilen = 0; 2017 b->imax = 0; 2018 b->row = isrow; 2019 b->col = iscol; 2020 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2021 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2022 b->icol = isicol; 2023 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 2024 /* In b structure: Free imax, ilen, old a, old j. 2025 Allocate bdiag, solve_work, new a, new j */ 2026 ierr = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2027 b->maxnz = b->nz = bi[n]; 2028 2029 (fact)->info.factor_mallocs = reallocs; 2030 (fact)->info.fill_ratio_given = f; 2031 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2032 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2033 if (a->inode.size) { 2034 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2035 } 2036 PetscFunctionReturn(0); 2037 } 2038 2039 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2040 { 2041 Mat C = B; 2042 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2043 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2044 IS ip=b->row,iip = b->icol; 2045 PetscErrorCode ierr; 2046 const PetscInt *rip,*riip; 2047 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2048 PetscInt *ai=a->i,*aj=a->j; 2049 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2050 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2051 PetscBool perm_identity; 2052 FactorShiftCtx sctx; 2053 PetscReal rs; 2054 MatScalar d,*v; 2055 2056 PetscFunctionBegin; 2057 /* MatPivotSetUp(): initialize shift context sctx */ 2058 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2059 2060 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2061 sctx.shift_top = info->zeropivot; 2062 for (i=0; i<mbs; i++) { 2063 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2064 d = (aa)[a->diag[i]]; 2065 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2066 v = aa+ai[i]; 2067 nz = ai[i+1] - ai[i]; 2068 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2069 if (rs>sctx.shift_top) sctx.shift_top = rs; 2070 } 2071 sctx.shift_top *= 1.1; 2072 sctx.nshift_max = 5; 2073 sctx.shift_lo = 0.; 2074 sctx.shift_hi = 1.; 2075 } 2076 2077 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2078 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2079 2080 /* allocate working arrays 2081 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2082 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2083 */ 2084 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr); 2085 2086 do { 2087 sctx.newshift = PETSC_FALSE; 2088 2089 for (i=0; i<mbs; i++) c2r[i] = mbs; 2090 if (mbs) il[0] = 0; 2091 2092 for (k = 0; k<mbs; k++) { 2093 /* zero rtmp */ 2094 nz = bi[k+1] - bi[k]; 2095 bjtmp = bj + bi[k]; 2096 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2097 2098 /* load in initial unfactored row */ 2099 bval = ba + bi[k]; 2100 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2101 for (j = jmin; j < jmax; j++) { 2102 col = riip[aj[j]]; 2103 if (col >= k) { /* only take upper triangular entry */ 2104 rtmp[col] = aa[j]; 2105 *bval++ = 0.0; /* for in-place factorization */ 2106 } 2107 } 2108 /* shift the diagonal of the matrix: ZeropivotApply() */ 2109 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2110 2111 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2112 dk = rtmp[k]; 2113 i = c2r[k]; /* first row to be added to k_th row */ 2114 2115 while (i < k) { 2116 nexti = c2r[i]; /* next row to be added to k_th row */ 2117 2118 /* compute multiplier, update diag(k) and U(i,k) */ 2119 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2120 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2121 dk += uikdi*ba[ili]; /* update diag[k] */ 2122 ba[ili] = uikdi; /* -U(i,k) */ 2123 2124 /* add multiple of row i to k-th row */ 2125 jmin = ili + 1; jmax = bi[i+1]; 2126 if (jmin < jmax) { 2127 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2128 /* update il and c2r for row i */ 2129 il[i] = jmin; 2130 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2131 } 2132 i = nexti; 2133 } 2134 2135 /* copy data into U(k,:) */ 2136 rs = 0.0; 2137 jmin = bi[k]; jmax = bi[k+1]-1; 2138 if (jmin < jmax) { 2139 for (j=jmin; j<jmax; j++) { 2140 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2141 } 2142 /* add the k-th row into il and c2r */ 2143 il[k] = jmin; 2144 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2145 } 2146 2147 /* MatPivotCheck() */ 2148 sctx.rs = rs; 2149 sctx.pv = dk; 2150 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 2151 if (sctx.newshift) break; 2152 dk = sctx.pv; 2153 2154 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2155 } 2156 } while (sctx.newshift); 2157 2158 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2159 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2160 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2161 2162 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2163 if (perm_identity) { 2164 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2165 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2166 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2167 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2168 } else { 2169 B->ops->solve = MatSolve_SeqSBAIJ_1; 2170 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2171 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2172 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2173 } 2174 2175 C->assembled = PETSC_TRUE; 2176 C->preallocated = PETSC_TRUE; 2177 2178 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2179 2180 /* MatPivotView() */ 2181 if (sctx.nshift) { 2182 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2183 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 2184 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2185 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2186 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2187 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 2188 } 2189 } 2190 PetscFunctionReturn(0); 2191 } 2192 2193 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2194 { 2195 Mat C = B; 2196 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2197 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2198 IS ip=b->row,iip = b->icol; 2199 PetscErrorCode ierr; 2200 const PetscInt *rip,*riip; 2201 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2202 PetscInt *ai=a->i,*aj=a->j; 2203 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2204 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2205 PetscBool perm_identity; 2206 FactorShiftCtx sctx; 2207 PetscReal rs; 2208 MatScalar d,*v; 2209 2210 PetscFunctionBegin; 2211 /* MatPivotSetUp(): initialize shift context sctx */ 2212 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2213 2214 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2215 sctx.shift_top = info->zeropivot; 2216 for (i=0; i<mbs; i++) { 2217 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2218 d = (aa)[a->diag[i]]; 2219 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2220 v = aa+ai[i]; 2221 nz = ai[i+1] - ai[i]; 2222 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2223 if (rs>sctx.shift_top) sctx.shift_top = rs; 2224 } 2225 sctx.shift_top *= 1.1; 2226 sctx.nshift_max = 5; 2227 sctx.shift_lo = 0.; 2228 sctx.shift_hi = 1.; 2229 } 2230 2231 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2232 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2233 2234 /* initialization */ 2235 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr); 2236 2237 do { 2238 sctx.newshift = PETSC_FALSE; 2239 2240 for (i=0; i<mbs; i++) jl[i] = mbs; 2241 il[0] = 0; 2242 2243 for (k = 0; k<mbs; k++) { 2244 /* zero rtmp */ 2245 nz = bi[k+1] - bi[k]; 2246 bjtmp = bj + bi[k]; 2247 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2248 2249 bval = ba + bi[k]; 2250 /* initialize k-th row by the perm[k]-th row of A */ 2251 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2252 for (j = jmin; j < jmax; j++) { 2253 col = riip[aj[j]]; 2254 if (col >= k) { /* only take upper triangular entry */ 2255 rtmp[col] = aa[j]; 2256 *bval++ = 0.0; /* for in-place factorization */ 2257 } 2258 } 2259 /* shift the diagonal of the matrix */ 2260 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2261 2262 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2263 dk = rtmp[k]; 2264 i = jl[k]; /* first row to be added to k_th row */ 2265 2266 while (i < k) { 2267 nexti = jl[i]; /* next row to be added to k_th row */ 2268 2269 /* compute multiplier, update diag(k) and U(i,k) */ 2270 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2271 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2272 dk += uikdi*ba[ili]; 2273 ba[ili] = uikdi; /* -U(i,k) */ 2274 2275 /* add multiple of row i to k-th row */ 2276 jmin = ili + 1; jmax = bi[i+1]; 2277 if (jmin < jmax) { 2278 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2279 /* update il and jl for row i */ 2280 il[i] = jmin; 2281 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2282 } 2283 i = nexti; 2284 } 2285 2286 /* shift the diagonals when zero pivot is detected */ 2287 /* compute rs=sum of abs(off-diagonal) */ 2288 rs = 0.0; 2289 jmin = bi[k]+1; 2290 nz = bi[k+1] - jmin; 2291 bcol = bj + jmin; 2292 for (j=0; j<nz; j++) { 2293 rs += PetscAbsScalar(rtmp[bcol[j]]); 2294 } 2295 2296 sctx.rs = rs; 2297 sctx.pv = dk; 2298 ierr = MatPivotCheck(B,A,info,&sctx,k);CHKERRQ(ierr); 2299 if (sctx.newshift) break; 2300 dk = sctx.pv; 2301 2302 /* copy data into U(k,:) */ 2303 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2304 jmin = bi[k]+1; jmax = bi[k+1]; 2305 if (jmin < jmax) { 2306 for (j=jmin; j<jmax; j++) { 2307 col = bj[j]; ba[j] = rtmp[col]; 2308 } 2309 /* add the k-th row into il and jl */ 2310 il[k] = jmin; 2311 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2312 } 2313 } 2314 } while (sctx.newshift); 2315 2316 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2317 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2318 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2319 2320 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2321 if (perm_identity) { 2322 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2323 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2324 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2325 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2326 } else { 2327 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2328 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2329 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2330 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2331 } 2332 2333 C->assembled = PETSC_TRUE; 2334 C->preallocated = PETSC_TRUE; 2335 2336 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2337 if (sctx.nshift) { 2338 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2339 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2340 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2341 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2342 } 2343 } 2344 PetscFunctionReturn(0); 2345 } 2346 2347 /* 2348 icc() under revised new data structure. 2349 Factored arrays bj and ba are stored as 2350 U(0,:),...,U(i,:),U(n-1,:) 2351 2352 ui=fact->i is an array of size n+1, in which 2353 ui+ 2354 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2355 ui[n]: points to U(n-1,n-1)+1 2356 2357 udiag=fact->diag is an array of size n,in which 2358 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2359 2360 U(i,:) contains udiag[i] as its last entry, i.e., 2361 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2362 */ 2363 2364 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2365 { 2366 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2367 Mat_SeqSBAIJ *b; 2368 PetscErrorCode ierr; 2369 PetscBool perm_identity,missing; 2370 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2371 const PetscInt *rip,*riip; 2372 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2373 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2374 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2375 PetscReal fill =info->fill,levels=info->levels; 2376 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2377 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2378 PetscBT lnkbt; 2379 IS iperm; 2380 2381 PetscFunctionBegin; 2382 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2383 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2384 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2385 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2386 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2387 2388 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2389 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2390 ui[0] = 0; 2391 2392 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2393 if (!levels && perm_identity) { 2394 for (i=0; i<am; i++) { 2395 ncols = ai[i+1] - a->diag[i]; 2396 ui[i+1] = ui[i] + ncols; 2397 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2398 } 2399 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2400 cols = uj; 2401 for (i=0; i<am; i++) { 2402 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2403 ncols = ai[i+1] - a->diag[i] -1; 2404 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2405 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2406 } 2407 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2408 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2409 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2410 2411 /* initialization */ 2412 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2413 2414 /* jl: linked list for storing indices of the pivot rows 2415 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2416 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2417 for (i=0; i<am; i++) { 2418 jl[i] = am; il[i] = 0; 2419 } 2420 2421 /* create and initialize a linked list for storing column indices of the active row k */ 2422 nlnk = am + 1; 2423 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2424 2425 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2426 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2427 current_space = free_space; 2428 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2429 current_space_lvl = free_space_lvl; 2430 2431 for (k=0; k<am; k++) { /* for each active row k */ 2432 /* initialize lnk by the column indices of row rip[k] of A */ 2433 nzk = 0; 2434 ncols = ai[rip[k]+1] - ai[rip[k]]; 2435 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2436 ncols_upper = 0; 2437 for (j=0; j<ncols; j++) { 2438 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2439 if (riip[i] >= k) { /* only take upper triangular entry */ 2440 ajtmp[ncols_upper] = i; 2441 ncols_upper++; 2442 } 2443 } 2444 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2445 nzk += nlnk; 2446 2447 /* update lnk by computing fill-in for each pivot row to be merged in */ 2448 prow = jl[k]; /* 1st pivot row */ 2449 2450 while (prow < k) { 2451 nextprow = jl[prow]; 2452 2453 /* merge prow into k-th row */ 2454 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2455 jmax = ui[prow+1]; 2456 ncols = jmax-jmin; 2457 i = jmin - ui[prow]; 2458 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2459 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2460 j = *(uj - 1); 2461 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2462 nzk += nlnk; 2463 2464 /* update il and jl for prow */ 2465 if (jmin < jmax) { 2466 il[prow] = jmin; 2467 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2468 } 2469 prow = nextprow; 2470 } 2471 2472 /* if free space is not available, make more free space */ 2473 if (current_space->local_remaining<nzk) { 2474 i = am - k + 1; /* num of unfactored rows */ 2475 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2476 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2477 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2478 reallocs++; 2479 } 2480 2481 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2482 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2483 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2484 2485 /* add the k-th row into il and jl */ 2486 if (nzk > 1) { 2487 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2488 jl[k] = jl[i]; jl[i] = k; 2489 il[k] = ui[k] + 1; 2490 } 2491 uj_ptr[k] = current_space->array; 2492 uj_lvl_ptr[k] = current_space_lvl->array; 2493 2494 current_space->array += nzk; 2495 current_space->local_used += nzk; 2496 current_space->local_remaining -= nzk; 2497 2498 current_space_lvl->array += nzk; 2499 current_space_lvl->local_used += nzk; 2500 current_space_lvl->local_remaining -= nzk; 2501 2502 ui[k+1] = ui[k] + nzk; 2503 } 2504 2505 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2506 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2507 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2508 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2509 2510 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2511 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2512 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2513 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2514 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2515 2516 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2517 2518 /* put together the new matrix in MATSEQSBAIJ format */ 2519 b = (Mat_SeqSBAIJ*)(fact)->data; 2520 b->singlemalloc = PETSC_FALSE; 2521 2522 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2523 2524 b->j = uj; 2525 b->i = ui; 2526 b->diag = udiag; 2527 b->free_diag = PETSC_TRUE; 2528 b->ilen = 0; 2529 b->imax = 0; 2530 b->row = perm; 2531 b->col = perm; 2532 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2533 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2534 b->icol = iperm; 2535 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2536 2537 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2538 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2539 2540 b->maxnz = b->nz = ui[am]; 2541 b->free_a = PETSC_TRUE; 2542 b->free_ij = PETSC_TRUE; 2543 2544 fact->info.factor_mallocs = reallocs; 2545 fact->info.fill_ratio_given = fill; 2546 if (ai[am] != 0) { 2547 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2548 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2549 } else { 2550 fact->info.fill_ratio_needed = 0.0; 2551 } 2552 #if defined(PETSC_USE_INFO) 2553 if (ai[am] != 0) { 2554 PetscReal af = fact->info.fill_ratio_needed; 2555 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2556 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2557 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2558 } else { 2559 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 2560 } 2561 #endif 2562 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2563 PetscFunctionReturn(0); 2564 } 2565 2566 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2567 { 2568 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2569 Mat_SeqSBAIJ *b; 2570 PetscErrorCode ierr; 2571 PetscBool perm_identity,missing; 2572 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2573 const PetscInt *rip,*riip; 2574 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2575 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2576 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2577 PetscReal fill =info->fill,levels=info->levels; 2578 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2579 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2580 PetscBT lnkbt; 2581 IS iperm; 2582 2583 PetscFunctionBegin; 2584 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2585 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2586 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2587 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2588 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2589 2590 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2591 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2592 ui[0] = 0; 2593 2594 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2595 if (!levels && perm_identity) { 2596 2597 for (i=0; i<am; i++) { 2598 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2599 udiag[i] = ui[i]; 2600 } 2601 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2602 cols = uj; 2603 for (i=0; i<am; i++) { 2604 aj = a->j + a->diag[i]; 2605 ncols = ui[i+1] - ui[i]; 2606 for (j=0; j<ncols; j++) *cols++ = *aj++; 2607 } 2608 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2609 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2610 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2611 2612 /* initialization */ 2613 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2614 2615 /* jl: linked list for storing indices of the pivot rows 2616 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2617 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2618 for (i=0; i<am; i++) { 2619 jl[i] = am; il[i] = 0; 2620 } 2621 2622 /* create and initialize a linked list for storing column indices of the active row k */ 2623 nlnk = am + 1; 2624 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2625 2626 /* initial FreeSpace size is fill*(ai[am]+1) */ 2627 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr); 2628 current_space = free_space; 2629 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);CHKERRQ(ierr); 2630 current_space_lvl = free_space_lvl; 2631 2632 for (k=0; k<am; k++) { /* for each active row k */ 2633 /* initialize lnk by the column indices of row rip[k] of A */ 2634 nzk = 0; 2635 ncols = ai[rip[k]+1] - ai[rip[k]]; 2636 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2637 ncols_upper = 0; 2638 for (j=0; j<ncols; j++) { 2639 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2640 if (riip[i] >= k) { /* only take upper triangular entry */ 2641 ajtmp[ncols_upper] = i; 2642 ncols_upper++; 2643 } 2644 } 2645 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2646 nzk += nlnk; 2647 2648 /* update lnk by computing fill-in for each pivot row to be merged in */ 2649 prow = jl[k]; /* 1st pivot row */ 2650 2651 while (prow < k) { 2652 nextprow = jl[prow]; 2653 2654 /* merge prow into k-th row */ 2655 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2656 jmax = ui[prow+1]; 2657 ncols = jmax-jmin; 2658 i = jmin - ui[prow]; 2659 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2660 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2661 j = *(uj - 1); 2662 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2663 nzk += nlnk; 2664 2665 /* update il and jl for prow */ 2666 if (jmin < jmax) { 2667 il[prow] = jmin; 2668 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2669 } 2670 prow = nextprow; 2671 } 2672 2673 /* if free space is not available, make more free space */ 2674 if (current_space->local_remaining<nzk) { 2675 i = am - k + 1; /* num of unfactored rows */ 2676 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2677 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2678 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2679 reallocs++; 2680 } 2681 2682 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2683 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2684 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2685 2686 /* add the k-th row into il and jl */ 2687 if (nzk > 1) { 2688 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2689 jl[k] = jl[i]; jl[i] = k; 2690 il[k] = ui[k] + 1; 2691 } 2692 uj_ptr[k] = current_space->array; 2693 uj_lvl_ptr[k] = current_space_lvl->array; 2694 2695 current_space->array += nzk; 2696 current_space->local_used += nzk; 2697 current_space->local_remaining -= nzk; 2698 2699 current_space_lvl->array += nzk; 2700 current_space_lvl->local_used += nzk; 2701 current_space_lvl->local_remaining -= nzk; 2702 2703 ui[k+1] = ui[k] + nzk; 2704 } 2705 2706 #if defined(PETSC_USE_INFO) 2707 if (ai[am] != 0) { 2708 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2709 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2710 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2711 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2712 } else { 2713 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 2714 } 2715 #endif 2716 2717 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2718 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2719 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2720 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2721 2722 /* destroy list of free space and other temporary array(s) */ 2723 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2724 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2725 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2726 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2727 2728 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2729 2730 /* put together the new matrix in MATSEQSBAIJ format */ 2731 2732 b = (Mat_SeqSBAIJ*)fact->data; 2733 b->singlemalloc = PETSC_FALSE; 2734 2735 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2736 2737 b->j = uj; 2738 b->i = ui; 2739 b->diag = udiag; 2740 b->free_diag = PETSC_TRUE; 2741 b->ilen = 0; 2742 b->imax = 0; 2743 b->row = perm; 2744 b->col = perm; 2745 2746 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2747 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2748 2749 b->icol = iperm; 2750 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2751 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2752 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2753 b->maxnz = b->nz = ui[am]; 2754 b->free_a = PETSC_TRUE; 2755 b->free_ij = PETSC_TRUE; 2756 2757 fact->info.factor_mallocs = reallocs; 2758 fact->info.fill_ratio_given = fill; 2759 if (ai[am] != 0) { 2760 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2761 } else { 2762 fact->info.fill_ratio_needed = 0.0; 2763 } 2764 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2765 PetscFunctionReturn(0); 2766 } 2767 2768 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2769 { 2770 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2771 Mat_SeqSBAIJ *b; 2772 PetscErrorCode ierr; 2773 PetscBool perm_identity,missing; 2774 PetscReal fill = info->fill; 2775 const PetscInt *rip,*riip; 2776 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2777 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2778 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2779 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2780 PetscBT lnkbt; 2781 IS iperm; 2782 2783 PetscFunctionBegin; 2784 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2785 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2786 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2787 2788 /* check whether perm is the identity mapping */ 2789 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2790 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2791 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2792 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2793 2794 /* initialization */ 2795 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2796 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2797 ui[0] = 0; 2798 2799 /* jl: linked list for storing indices of the pivot rows 2800 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2801 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2802 for (i=0; i<am; i++) { 2803 jl[i] = am; il[i] = 0; 2804 } 2805 2806 /* create and initialize a linked list for storing column indices of the active row k */ 2807 nlnk = am + 1; 2808 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2809 2810 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2811 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2812 current_space = free_space; 2813 2814 for (k=0; k<am; k++) { /* for each active row k */ 2815 /* initialize lnk by the column indices of row rip[k] of A */ 2816 nzk = 0; 2817 ncols = ai[rip[k]+1] - ai[rip[k]]; 2818 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2819 ncols_upper = 0; 2820 for (j=0; j<ncols; j++) { 2821 i = riip[*(aj + ai[rip[k]] + j)]; 2822 if (i >= k) { /* only take upper triangular entry */ 2823 cols[ncols_upper] = i; 2824 ncols_upper++; 2825 } 2826 } 2827 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2828 nzk += nlnk; 2829 2830 /* update lnk by computing fill-in for each pivot row to be merged in */ 2831 prow = jl[k]; /* 1st pivot row */ 2832 2833 while (prow < k) { 2834 nextprow = jl[prow]; 2835 /* merge prow into k-th row */ 2836 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2837 jmax = ui[prow+1]; 2838 ncols = jmax-jmin; 2839 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2840 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2841 nzk += nlnk; 2842 2843 /* update il and jl for prow */ 2844 if (jmin < jmax) { 2845 il[prow] = jmin; 2846 j = *uj_ptr; 2847 jl[prow] = jl[j]; 2848 jl[j] = prow; 2849 } 2850 prow = nextprow; 2851 } 2852 2853 /* if free space is not available, make more free space */ 2854 if (current_space->local_remaining<nzk) { 2855 i = am - k + 1; /* num of unfactored rows */ 2856 i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2857 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2858 reallocs++; 2859 } 2860 2861 /* copy data into free space, then initialize lnk */ 2862 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2863 2864 /* add the k-th row into il and jl */ 2865 if (nzk > 1) { 2866 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2867 jl[k] = jl[i]; jl[i] = k; 2868 il[k] = ui[k] + 1; 2869 } 2870 ui_ptr[k] = current_space->array; 2871 2872 current_space->array += nzk; 2873 current_space->local_used += nzk; 2874 current_space->local_remaining -= nzk; 2875 2876 ui[k+1] = ui[k] + nzk; 2877 } 2878 2879 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2880 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2881 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2882 2883 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2884 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2885 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2886 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2887 2888 /* put together the new matrix in MATSEQSBAIJ format */ 2889 2890 b = (Mat_SeqSBAIJ*)fact->data; 2891 b->singlemalloc = PETSC_FALSE; 2892 b->free_a = PETSC_TRUE; 2893 b->free_ij = PETSC_TRUE; 2894 2895 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2896 2897 b->j = uj; 2898 b->i = ui; 2899 b->diag = udiag; 2900 b->free_diag = PETSC_TRUE; 2901 b->ilen = 0; 2902 b->imax = 0; 2903 b->row = perm; 2904 b->col = perm; 2905 2906 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2907 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2908 2909 b->icol = iperm; 2910 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2911 2912 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2913 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2914 2915 b->maxnz = b->nz = ui[am]; 2916 2917 fact->info.factor_mallocs = reallocs; 2918 fact->info.fill_ratio_given = fill; 2919 if (ai[am] != 0) { 2920 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2921 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2922 } else { 2923 fact->info.fill_ratio_needed = 0.0; 2924 } 2925 #if defined(PETSC_USE_INFO) 2926 if (ai[am] != 0) { 2927 PetscReal af = fact->info.fill_ratio_needed; 2928 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2929 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2930 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2931 } else { 2932 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 2933 } 2934 #endif 2935 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2936 PetscFunctionReturn(0); 2937 } 2938 2939 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2940 { 2941 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2942 Mat_SeqSBAIJ *b; 2943 PetscErrorCode ierr; 2944 PetscBool perm_identity,missing; 2945 PetscReal fill = info->fill; 2946 const PetscInt *rip,*riip; 2947 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2948 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2949 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2950 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2951 PetscBT lnkbt; 2952 IS iperm; 2953 2954 PetscFunctionBegin; 2955 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2956 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2957 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2958 2959 /* check whether perm is the identity mapping */ 2960 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2961 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2962 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2963 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2964 2965 /* initialization */ 2966 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2967 ui[0] = 0; 2968 2969 /* jl: linked list for storing indices of the pivot rows 2970 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2971 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2972 for (i=0; i<am; i++) { 2973 jl[i] = am; il[i] = 0; 2974 } 2975 2976 /* create and initialize a linked list for storing column indices of the active row k */ 2977 nlnk = am + 1; 2978 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2979 2980 /* initial FreeSpace size is fill*(ai[am]+1) */ 2981 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr); 2982 current_space = free_space; 2983 2984 for (k=0; k<am; k++) { /* for each active row k */ 2985 /* initialize lnk by the column indices of row rip[k] of A */ 2986 nzk = 0; 2987 ncols = ai[rip[k]+1] - ai[rip[k]]; 2988 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2989 ncols_upper = 0; 2990 for (j=0; j<ncols; j++) { 2991 i = riip[*(aj + ai[rip[k]] + j)]; 2992 if (i >= k) { /* only take upper triangular entry */ 2993 cols[ncols_upper] = i; 2994 ncols_upper++; 2995 } 2996 } 2997 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2998 nzk += nlnk; 2999 3000 /* update lnk by computing fill-in for each pivot row to be merged in */ 3001 prow = jl[k]; /* 1st pivot row */ 3002 3003 while (prow < k) { 3004 nextprow = jl[prow]; 3005 /* merge prow into k-th row */ 3006 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3007 jmax = ui[prow+1]; 3008 ncols = jmax-jmin; 3009 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3010 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3011 nzk += nlnk; 3012 3013 /* update il and jl for prow */ 3014 if (jmin < jmax) { 3015 il[prow] = jmin; 3016 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3017 } 3018 prow = nextprow; 3019 } 3020 3021 /* if free space is not available, make more free space */ 3022 if (current_space->local_remaining<nzk) { 3023 i = am - k + 1; /* num of unfactored rows */ 3024 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3025 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3026 reallocs++; 3027 } 3028 3029 /* copy data into free space, then initialize lnk */ 3030 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3031 3032 /* add the k-th row into il and jl */ 3033 if (nzk-1 > 0) { 3034 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3035 jl[k] = jl[i]; jl[i] = k; 3036 il[k] = ui[k] + 1; 3037 } 3038 ui_ptr[k] = current_space->array; 3039 3040 current_space->array += nzk; 3041 current_space->local_used += nzk; 3042 current_space->local_remaining -= nzk; 3043 3044 ui[k+1] = ui[k] + nzk; 3045 } 3046 3047 #if defined(PETSC_USE_INFO) 3048 if (ai[am] != 0) { 3049 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3050 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 3051 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 3052 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 3053 } else { 3054 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 3055 } 3056 #endif 3057 3058 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3059 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3060 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3061 3062 /* destroy list of free space and other temporary array(s) */ 3063 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 3064 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3065 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3066 3067 /* put together the new matrix in MATSEQSBAIJ format */ 3068 3069 b = (Mat_SeqSBAIJ*)fact->data; 3070 b->singlemalloc = PETSC_FALSE; 3071 b->free_a = PETSC_TRUE; 3072 b->free_ij = PETSC_TRUE; 3073 3074 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 3075 3076 b->j = uj; 3077 b->i = ui; 3078 b->diag = 0; 3079 b->ilen = 0; 3080 b->imax = 0; 3081 b->row = perm; 3082 b->col = perm; 3083 3084 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3085 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3086 3087 b->icol = iperm; 3088 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3089 3090 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 3091 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3092 b->maxnz = b->nz = ui[am]; 3093 3094 fact->info.factor_mallocs = reallocs; 3095 fact->info.fill_ratio_given = fill; 3096 if (ai[am] != 0) { 3097 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3098 } else { 3099 fact->info.fill_ratio_needed = 0.0; 3100 } 3101 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3102 PetscFunctionReturn(0); 3103 } 3104 3105 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3106 { 3107 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3108 PetscErrorCode ierr; 3109 PetscInt n = A->rmap->n; 3110 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3111 PetscScalar *x,sum; 3112 const PetscScalar *b; 3113 const MatScalar *aa = a->a,*v; 3114 PetscInt i,nz; 3115 3116 PetscFunctionBegin; 3117 if (!n) PetscFunctionReturn(0); 3118 3119 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3120 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 3121 3122 /* forward solve the lower triangular */ 3123 x[0] = b[0]; 3124 v = aa; 3125 vi = aj; 3126 for (i=1; i<n; i++) { 3127 nz = ai[i+1] - ai[i]; 3128 sum = b[i]; 3129 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3130 v += nz; 3131 vi += nz; 3132 x[i] = sum; 3133 } 3134 3135 /* backward solve the upper triangular */ 3136 for (i=n-1; i>=0; i--) { 3137 v = aa + adiag[i+1] + 1; 3138 vi = aj + adiag[i+1] + 1; 3139 nz = adiag[i] - adiag[i+1]-1; 3140 sum = x[i]; 3141 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3142 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3143 } 3144 3145 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3146 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3147 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 3148 PetscFunctionReturn(0); 3149 } 3150 3151 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3152 { 3153 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3154 IS iscol = a->col,isrow = a->row; 3155 PetscErrorCode ierr; 3156 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3157 const PetscInt *rout,*cout,*r,*c; 3158 PetscScalar *x,*tmp,sum; 3159 const PetscScalar *b; 3160 const MatScalar *aa = a->a,*v; 3161 3162 PetscFunctionBegin; 3163 if (!n) PetscFunctionReturn(0); 3164 3165 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3166 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 3167 tmp = a->solve_work; 3168 3169 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3170 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3171 3172 /* forward solve the lower triangular */ 3173 tmp[0] = b[r[0]]; 3174 v = aa; 3175 vi = aj; 3176 for (i=1; i<n; i++) { 3177 nz = ai[i+1] - ai[i]; 3178 sum = b[r[i]]; 3179 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3180 tmp[i] = sum; 3181 v += nz; vi += nz; 3182 } 3183 3184 /* backward solve the upper triangular */ 3185 for (i=n-1; i>=0; i--) { 3186 v = aa + adiag[i+1]+1; 3187 vi = aj + adiag[i+1]+1; 3188 nz = adiag[i]-adiag[i+1]-1; 3189 sum = tmp[i]; 3190 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3191 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3192 } 3193 3194 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3195 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3196 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3197 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 3198 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3199 PetscFunctionReturn(0); 3200 } 3201 3202 /* 3203 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3204 */ 3205 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3206 { 3207 Mat B = *fact; 3208 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3209 IS isicol; 3210 PetscErrorCode ierr; 3211 const PetscInt *r,*ic; 3212 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3213 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3214 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3215 PetscInt nlnk,*lnk; 3216 PetscBT lnkbt; 3217 PetscBool row_identity,icol_identity; 3218 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3219 const PetscInt *ics; 3220 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3221 PetscReal dt =info->dt,shift=info->shiftamount; 3222 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3223 PetscBool missing; 3224 3225 PetscFunctionBegin; 3226 if (dt == PETSC_DEFAULT) dt = 0.005; 3227 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3228 3229 /* ------- symbolic factorization, can be reused ---------*/ 3230 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3231 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3232 adiag=a->diag; 3233 3234 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3235 3236 /* bdiag is location of diagonal in factor */ 3237 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3238 ierr = PetscMalloc1(n+1,&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3239 3240 /* allocate row pointers bi */ 3241 ierr = PetscMalloc1(2*n+2,&bi);CHKERRQ(ierr); 3242 3243 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3244 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3245 nnz_max = ai[n]+2*n*dtcount+2; 3246 3247 ierr = PetscMalloc1(nnz_max+1,&bj);CHKERRQ(ierr); 3248 ierr = PetscMalloc1(nnz_max+1,&ba);CHKERRQ(ierr); 3249 3250 /* put together the new matrix */ 3251 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3252 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 3253 b = (Mat_SeqAIJ*)B->data; 3254 3255 b->free_a = PETSC_TRUE; 3256 b->free_ij = PETSC_TRUE; 3257 b->singlemalloc = PETSC_FALSE; 3258 3259 b->a = ba; 3260 b->j = bj; 3261 b->i = bi; 3262 b->diag = bdiag; 3263 b->ilen = 0; 3264 b->imax = 0; 3265 b->row = isrow; 3266 b->col = iscol; 3267 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3268 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3269 b->icol = isicol; 3270 3271 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 3272 ierr = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3273 b->maxnz = nnz_max; 3274 3275 B->factortype = MAT_FACTOR_ILUDT; 3276 B->info.factor_mallocs = 0; 3277 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3278 /* ------- end of symbolic factorization ---------*/ 3279 3280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3282 ics = ic; 3283 3284 /* linked list for storing column indices of the active row */ 3285 nlnk = n + 1; 3286 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3287 3288 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3289 ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr); 3290 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3291 ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr); 3292 ierr = PetscArrayzero(rtmp,n);CHKERRQ(ierr); 3293 3294 bi[0] = 0; 3295 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3296 bdiag_rev[n] = bdiag[0]; 3297 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3298 for (i=0; i<n; i++) { 3299 /* copy initial fill into linked list */ 3300 nzi = ai[r[i]+1] - ai[r[i]]; 3301 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3302 nzi_al = adiag[r[i]] - ai[r[i]]; 3303 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3304 ajtmp = aj + ai[r[i]]; 3305 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3306 3307 /* load in initial (unfactored row) */ 3308 aatmp = a->a + ai[r[i]]; 3309 for (j=0; j<nzi; j++) { 3310 rtmp[ics[*ajtmp++]] = *aatmp++; 3311 } 3312 3313 /* add pivot rows into linked list */ 3314 row = lnk[n]; 3315 while (row < i) { 3316 nzi_bl = bi[row+1] - bi[row] + 1; 3317 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3318 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3319 nzi += nlnk; 3320 row = lnk[row]; 3321 } 3322 3323 /* copy data from lnk into jtmp, then initialize lnk */ 3324 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3325 3326 /* numerical factorization */ 3327 bjtmp = jtmp; 3328 row = *bjtmp++; /* 1st pivot row */ 3329 while (row < i) { 3330 pc = rtmp + row; 3331 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3332 multiplier = (*pc) * (*pv); 3333 *pc = multiplier; 3334 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3335 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3336 pv = ba + bdiag[row+1] + 1; 3337 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3338 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3339 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3340 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3341 } 3342 row = *bjtmp++; 3343 } 3344 3345 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3346 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3347 nzi_bl = 0; j = 0; 3348 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3349 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3350 nzi_bl++; j++; 3351 } 3352 nzi_bu = nzi - nzi_bl -1; 3353 while (j < nzi) { 3354 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3355 j++; 3356 } 3357 3358 bjtmp = bj + bi[i]; 3359 batmp = ba + bi[i]; 3360 /* apply level dropping rule to L part */ 3361 ncut = nzi_al + dtcount; 3362 if (ncut < nzi_bl) { 3363 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3364 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3365 } else { 3366 ncut = nzi_bl; 3367 } 3368 for (j=0; j<ncut; j++) { 3369 bjtmp[j] = jtmp[j]; 3370 batmp[j] = vtmp[j]; 3371 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3372 } 3373 bi[i+1] = bi[i] + ncut; 3374 nzi = ncut + 1; 3375 3376 /* apply level dropping rule to U part */ 3377 ncut = nzi_au + dtcount; 3378 if (ncut < nzi_bu) { 3379 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3380 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3381 } else { 3382 ncut = nzi_bu; 3383 } 3384 nzi += ncut; 3385 3386 /* mark bdiagonal */ 3387 bdiag[i+1] = bdiag[i] - (ncut + 1); 3388 bdiag_rev[n-i-1] = bdiag[i+1]; 3389 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3390 bjtmp = bj + bdiag[i]; 3391 batmp = ba + bdiag[i]; 3392 *bjtmp = i; 3393 *batmp = diag_tmp; /* rtmp[i]; */ 3394 if (*batmp == 0.0) { 3395 *batmp = dt+shift; 3396 /* printf(" row %d add shift %g\n",i,shift); */ 3397 } 3398 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3399 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3400 3401 bjtmp = bj + bdiag[i+1]+1; 3402 batmp = ba + bdiag[i+1]+1; 3403 for (k=0; k<ncut; k++) { 3404 bjtmp[k] = jtmp[nzi_bl+1+k]; 3405 batmp[k] = vtmp[nzi_bl+1+k]; 3406 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3407 } 3408 /* printf("\n"); */ 3409 3410 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3411 /* 3412 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3413 printf(" ----------------------------\n"); 3414 */ 3415 } /* for (i=0; i<n; i++) */ 3416 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3417 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3418 3419 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3420 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3421 3422 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3423 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3424 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3425 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3426 3427 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3428 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3429 3430 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3431 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3432 if (row_identity && icol_identity) { 3433 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3434 } else { 3435 B->ops->solve = MatSolve_SeqAIJ; 3436 } 3437 3438 B->ops->solveadd = 0; 3439 B->ops->solvetranspose = 0; 3440 B->ops->solvetransposeadd = 0; 3441 B->ops->matsolve = 0; 3442 B->assembled = PETSC_TRUE; 3443 B->preallocated = PETSC_TRUE; 3444 PetscFunctionReturn(0); 3445 } 3446 3447 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3448 /* 3449 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3450 */ 3451 3452 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3453 { 3454 PetscErrorCode ierr; 3455 3456 PetscFunctionBegin; 3457 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3458 PetscFunctionReturn(0); 3459 } 3460 3461 /* 3462 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3463 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3464 */ 3465 /* 3466 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3467 */ 3468 3469 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3470 { 3471 Mat C =fact; 3472 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3473 IS isrow = b->row,isicol = b->icol; 3474 PetscErrorCode ierr; 3475 const PetscInt *r,*ic,*ics; 3476 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3477 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3478 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3479 PetscReal dt=info->dt,shift=info->shiftamount; 3480 PetscBool row_identity, col_identity; 3481 3482 PetscFunctionBegin; 3483 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3484 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3485 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 3486 ics = ic; 3487 3488 for (i=0; i<n; i++) { 3489 /* initialize rtmp array */ 3490 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3491 bjtmp = bj + bi[i]; 3492 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3493 rtmp[i] = 0.0; 3494 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3495 bjtmp = bj + bdiag[i+1] + 1; 3496 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3497 3498 /* load in initial unfactored row of A */ 3499 /* printf("row %d\n",i); */ 3500 nz = ai[r[i]+1] - ai[r[i]]; 3501 ajtmp = aj + ai[r[i]]; 3502 v = aa + ai[r[i]]; 3503 for (j=0; j<nz; j++) { 3504 rtmp[ics[*ajtmp++]] = v[j]; 3505 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3506 } 3507 /* printf("\n"); */ 3508 3509 /* numerical factorization */ 3510 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3511 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3512 k = 0; 3513 while (k < nzl) { 3514 row = *bjtmp++; 3515 /* printf(" prow %d\n",row); */ 3516 pc = rtmp + row; 3517 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3518 multiplier = (*pc) * (*pv); 3519 *pc = multiplier; 3520 if (PetscAbsScalar(multiplier) > dt) { 3521 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3522 pv = b->a + bdiag[row+1] + 1; 3523 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3524 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3525 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3526 } 3527 k++; 3528 } 3529 3530 /* finished row so stick it into b->a */ 3531 /* L-part */ 3532 pv = b->a + bi[i]; 3533 pj = bj + bi[i]; 3534 nzl = bi[i+1] - bi[i]; 3535 for (j=0; j<nzl; j++) { 3536 pv[j] = rtmp[pj[j]]; 3537 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3538 } 3539 3540 /* diagonal: invert diagonal entries for simplier triangular solves */ 3541 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3542 b->a[bdiag[i]] = 1.0/rtmp[i]; 3543 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3544 3545 /* U-part */ 3546 pv = b->a + bdiag[i+1] + 1; 3547 pj = bj + bdiag[i+1] + 1; 3548 nzu = bdiag[i] - bdiag[i+1] - 1; 3549 for (j=0; j<nzu; j++) { 3550 pv[j] = rtmp[pj[j]]; 3551 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3552 } 3553 /* printf("\n"); */ 3554 } 3555 3556 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3557 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3558 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3559 3560 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3561 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3562 if (row_identity && col_identity) { 3563 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3564 } else { 3565 C->ops->solve = MatSolve_SeqAIJ; 3566 } 3567 C->ops->solveadd = 0; 3568 C->ops->solvetranspose = 0; 3569 C->ops->solvetransposeadd = 0; 3570 C->ops->matsolve = 0; 3571 C->assembled = PETSC_TRUE; 3572 C->preallocated = PETSC_TRUE; 3573 3574 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3575 PetscFunctionReturn(0); 3576 } 3577