1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 /* 8 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 9 10 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 11 */ 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 13 { 14 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 15 PetscErrorCode ierr; 16 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 17 const PetscInt *ai = a->i, *aj = a->j; 18 const PetscScalar *aa = a->a; 19 PetscBool *done; 20 PetscReal best,past = 0,future; 21 22 PetscFunctionBegin; 23 /* pick initial row */ 24 best = -1; 25 for (i=0; i<n; i++) { 26 future = 0.0; 27 for (j=ai[i]; j<ai[i+1]; j++) { 28 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 29 else past = PetscAbsScalar(aa[j]); 30 } 31 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 32 if (past/future > best) { 33 best = past/future; 34 current = i; 35 } 36 } 37 38 ierr = PetscMalloc1(n,&done);CHKERRQ(ierr); 39 ierr = PetscArrayzero(done,n);CHKERRQ(ierr); 40 ierr = PetscMalloc1(n,&order);CHKERRQ(ierr); 41 order[0] = current; 42 for (i=0; i<n-1; i++) { 43 done[current] = PETSC_TRUE; 44 best = -1; 45 /* loop over all neighbors of current pivot */ 46 for (j=ai[current]; j<ai[current+1]; j++) { 47 jj = aj[j]; 48 if (done[jj]) continue; 49 /* loop over columns of potential next row computing weights for below and above diagonal */ 50 past = future = 0.0; 51 for (k=ai[jj]; k<ai[jj+1]; k++) { 52 kk = aj[k]; 53 if (done[kk]) past += PetscAbsScalar(aa[k]); 54 else if (kk != jj) future += PetscAbsScalar(aa[k]); 55 } 56 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 57 if (past/future > best) { 58 best = past/future; 59 newcurrent = jj; 60 } 61 } 62 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 63 best = -1; 64 for (k=0; k<n; k++) { 65 if (done[k]) continue; 66 future = 0.0; 67 past = 0.0; 68 for (j=ai[k]; j<ai[k+1]; j++) { 69 kk = aj[j]; 70 if (done[kk]) past += PetscAbsScalar(aa[j]); 71 else if (kk != k) future += PetscAbsScalar(aa[j]); 72 } 73 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 74 if (past/future > best) { 75 best = past/future; 76 newcurrent = k; 77 } 78 } 79 } 80 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 81 current = newcurrent; 82 order[i+1] = current; 83 } 84 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 85 *icol = *irow; 86 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 87 ierr = PetscFree(done);CHKERRQ(ierr); 88 ierr = PetscFree(order);CHKERRQ(ierr); 89 PetscFunctionReturn(0); 90 } 91 92 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 93 { 94 PetscInt n = A->rmap->n; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 #if defined(PETSC_USE_COMPLEX) 99 if (A->hermitian && !A->symmetric && (ftype == MAT_FACTOR_CHOLESKY||ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY or ICC Factor is not supported"); 100 #endif 101 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 102 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 103 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 104 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 105 106 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 107 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 108 109 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 110 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 111 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 112 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 113 114 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 115 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 116 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 117 (*B)->factortype = ftype; 118 119 ierr = PetscFree((*B)->solvertype);CHKERRQ(ierr); 120 ierr = PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 125 { 126 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 127 IS isicol; 128 PetscErrorCode ierr; 129 const PetscInt *r,*ic; 130 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 131 PetscInt *bi,*bj,*ajtmp; 132 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 133 PetscReal f; 134 PetscInt nlnk,*lnk,k,**bi_ptr; 135 PetscFreeSpaceList free_space=NULL,current_space=NULL; 136 PetscBT lnkbt; 137 PetscBool missing; 138 139 PetscFunctionBegin; 140 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 141 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 142 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 143 144 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 145 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 146 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 147 148 /* get new row pointers */ 149 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 150 bi[0] = 0; 151 152 /* bdiag is location of diagonal in factor */ 153 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 154 bdiag[0] = 0; 155 156 /* linked list for storing column indices of the active row */ 157 nlnk = n + 1; 158 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 159 160 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 161 162 /* initial FreeSpace size is f*(ai[n]+1) */ 163 f = info->fill; 164 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 165 current_space = free_space; 166 167 for (i=0; i<n; i++) { 168 /* copy previous fill into linked list */ 169 nzi = 0; 170 nnz = ai[r[i]+1] - ai[r[i]]; 171 ajtmp = aj + ai[r[i]]; 172 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 173 nzi += nlnk; 174 175 /* add pivot rows into linked list */ 176 row = lnk[n]; 177 while (row < i) { 178 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 179 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 180 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 181 nzi += nlnk; 182 row = lnk[row]; 183 } 184 bi[i+1] = bi[i] + nzi; 185 im[i] = nzi; 186 187 /* mark bdiag */ 188 nzbd = 0; 189 nnz = nzi; 190 k = lnk[n]; 191 while (nnz-- && k < i) { 192 nzbd++; 193 k = lnk[k]; 194 } 195 bdiag[i] = bi[i] + nzbd; 196 197 /* if free space is not available, make more free space */ 198 if (current_space->local_remaining<nzi) { 199 nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */ 200 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 201 reallocs++; 202 } 203 204 /* copy data into free space, then initialize lnk */ 205 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 206 207 bi_ptr[i] = current_space->array; 208 current_space->array += nzi; 209 current_space->local_used += nzi; 210 current_space->local_remaining -= nzi; 211 } 212 #if defined(PETSC_USE_INFO) 213 if (ai[n] != 0) { 214 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 215 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 216 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 217 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 218 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 219 } else { 220 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 221 } 222 #endif 223 224 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 226 227 /* destroy list of free space and other temporary array(s) */ 228 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 229 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 230 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 231 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 232 233 /* put together the new matrix */ 234 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 235 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 236 b = (Mat_SeqAIJ*)(B)->data; 237 238 b->free_a = PETSC_TRUE; 239 b->free_ij = PETSC_TRUE; 240 b->singlemalloc = PETSC_FALSE; 241 242 ierr = PetscMalloc1(bi[n]+1,&b->a);CHKERRQ(ierr); 243 b->j = bj; 244 b->i = bi; 245 b->diag = bdiag; 246 b->ilen = 0; 247 b->imax = 0; 248 b->row = isrow; 249 b->col = iscol; 250 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 251 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 252 b->icol = isicol; 253 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 254 255 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 256 ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 257 b->maxnz = b->nz = bi[n]; 258 259 (B)->factortype = MAT_FACTOR_LU; 260 (B)->info.factor_mallocs = reallocs; 261 (B)->info.fill_ratio_given = f; 262 263 if (ai[n]) { 264 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 265 } else { 266 (B)->info.fill_ratio_needed = 0.0; 267 } 268 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 269 if (a->inode.size) { 270 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 271 } 272 PetscFunctionReturn(0); 273 } 274 275 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 276 { 277 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 278 IS isicol; 279 PetscErrorCode ierr; 280 const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp; 281 PetscInt i,n=A->rmap->n; 282 PetscInt *bi,*bj; 283 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 284 PetscReal f; 285 PetscInt nlnk,*lnk,k,**bi_ptr; 286 PetscFreeSpaceList free_space=NULL,current_space=NULL; 287 PetscBT lnkbt; 288 PetscBool missing; 289 290 PetscFunctionBegin; 291 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 292 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 293 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 294 295 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 296 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 297 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 298 299 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 300 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 301 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 302 bi[0] = bdiag[0] = 0; 303 304 /* linked list for storing column indices of the active row */ 305 nlnk = n + 1; 306 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 307 308 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 309 310 /* initial FreeSpace size is f*(ai[n]+1) */ 311 f = info->fill; 312 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 313 current_space = free_space; 314 315 for (i=0; i<n; i++) { 316 /* copy previous fill into linked list */ 317 nzi = 0; 318 nnz = ai[r[i]+1] - ai[r[i]]; 319 ajtmp = aj + ai[r[i]]; 320 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 321 nzi += nlnk; 322 323 /* add pivot rows into linked list */ 324 row = lnk[n]; 325 while (row < i) { 326 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 327 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 328 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 329 nzi += nlnk; 330 row = lnk[row]; 331 } 332 bi[i+1] = bi[i] + nzi; 333 im[i] = nzi; 334 335 /* mark bdiag */ 336 nzbd = 0; 337 nnz = nzi; 338 k = lnk[n]; 339 while (nnz-- && k < i) { 340 nzbd++; 341 k = lnk[k]; 342 } 343 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 344 345 /* if free space is not available, make more free space */ 346 if (current_space->local_remaining<nzi) { 347 /* estimated additional space needed */ 348 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi)); 349 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 350 reallocs++; 351 } 352 353 /* copy data into free space, then initialize lnk */ 354 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 355 356 bi_ptr[i] = current_space->array; 357 current_space->array += nzi; 358 current_space->local_used += nzi; 359 current_space->local_remaining -= nzi; 360 } 361 362 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 363 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 364 365 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 366 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 367 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 368 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 369 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 370 371 /* put together the new matrix */ 372 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 373 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 374 b = (Mat_SeqAIJ*)(B)->data; 375 376 b->free_a = PETSC_TRUE; 377 b->free_ij = PETSC_TRUE; 378 b->singlemalloc = PETSC_FALSE; 379 380 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 381 382 b->j = bj; 383 b->i = bi; 384 b->diag = bdiag; 385 b->ilen = 0; 386 b->imax = 0; 387 b->row = isrow; 388 b->col = iscol; 389 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 390 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 391 b->icol = isicol; 392 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 393 394 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 395 ierr = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 396 b->maxnz = b->nz = bdiag[0]+1; 397 398 B->factortype = MAT_FACTOR_LU; 399 B->info.factor_mallocs = reallocs; 400 B->info.fill_ratio_given = f; 401 402 if (ai[n]) { 403 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 404 } else { 405 B->info.fill_ratio_needed = 0.0; 406 } 407 #if defined(PETSC_USE_INFO) 408 if (ai[n] != 0) { 409 PetscReal af = B->info.fill_ratio_needed; 410 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 411 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 412 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 413 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 414 } else { 415 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 416 } 417 #endif 418 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 419 if (a->inode.size) { 420 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 421 } 422 ierr = MatSeqAIJCheckInode_FactorLU(B);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 /* 427 Trouble in factorization, should we dump the original matrix? 428 */ 429 PetscErrorCode MatFactorDumpMatrix(Mat A) 430 { 431 PetscErrorCode ierr; 432 PetscBool flg = PETSC_FALSE; 433 434 PetscFunctionBegin; 435 ierr = PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr); 436 if (flg) { 437 PetscViewer viewer; 438 char filename[PETSC_MAX_PATH_LEN]; 439 440 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 441 ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 442 ierr = MatView(A,viewer);CHKERRQ(ierr); 443 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 444 } 445 PetscFunctionReturn(0); 446 } 447 448 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 449 { 450 Mat C =B; 451 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 452 IS isrow = b->row,isicol = b->icol; 453 PetscErrorCode ierr; 454 const PetscInt *r,*ic,*ics; 455 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 456 PetscInt i,j,k,nz,nzL,row,*pj; 457 const PetscInt *ajtmp,*bjtmp; 458 MatScalar *rtmp,*pc,multiplier,*pv; 459 const MatScalar *aa=a->a,*v; 460 PetscBool row_identity,col_identity; 461 FactorShiftCtx sctx; 462 const PetscInt *ddiag; 463 PetscReal rs; 464 MatScalar d; 465 466 PetscFunctionBegin; 467 /* MatPivotSetUp(): initialize shift context sctx */ 468 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 469 470 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 471 ddiag = a->diag; 472 sctx.shift_top = info->zeropivot; 473 for (i=0; i<n; i++) { 474 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 475 d = (aa)[ddiag[i]]; 476 rs = -PetscAbsScalar(d) - PetscRealPart(d); 477 v = aa+ai[i]; 478 nz = ai[i+1] - ai[i]; 479 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 480 if (rs>sctx.shift_top) sctx.shift_top = rs; 481 } 482 sctx.shift_top *= 1.1; 483 sctx.nshift_max = 5; 484 sctx.shift_lo = 0.; 485 sctx.shift_hi = 1.; 486 } 487 488 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 489 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 490 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 491 ics = ic; 492 493 do { 494 sctx.newshift = PETSC_FALSE; 495 for (i=0; i<n; i++) { 496 /* zero rtmp */ 497 /* L part */ 498 nz = bi[i+1] - bi[i]; 499 bjtmp = bj + bi[i]; 500 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 501 502 /* U part */ 503 nz = bdiag[i]-bdiag[i+1]; 504 bjtmp = bj + bdiag[i+1]+1; 505 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 506 507 /* load in initial (unfactored row) */ 508 nz = ai[r[i]+1] - ai[r[i]]; 509 ajtmp = aj + ai[r[i]]; 510 v = aa + ai[r[i]]; 511 for (j=0; j<nz; j++) { 512 rtmp[ics[ajtmp[j]]] = v[j]; 513 } 514 /* ZeropivotApply() */ 515 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 516 517 /* elimination */ 518 bjtmp = bj + bi[i]; 519 row = *bjtmp++; 520 nzL = bi[i+1] - bi[i]; 521 for (k=0; k < nzL; k++) { 522 pc = rtmp + row; 523 if (*pc != 0.0) { 524 pv = b->a + bdiag[row]; 525 multiplier = *pc * (*pv); 526 *pc = multiplier; 527 528 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 529 pv = b->a + bdiag[row+1]+1; 530 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 531 532 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 533 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 534 } 535 row = *bjtmp++; 536 } 537 538 /* finished row so stick it into b->a */ 539 rs = 0.0; 540 /* L part */ 541 pv = b->a + bi[i]; 542 pj = b->j + bi[i]; 543 nz = bi[i+1] - bi[i]; 544 for (j=0; j<nz; j++) { 545 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 546 } 547 548 /* U part */ 549 pv = b->a + bdiag[i+1]+1; 550 pj = b->j + bdiag[i+1]+1; 551 nz = bdiag[i] - bdiag[i+1]-1; 552 for (j=0; j<nz; j++) { 553 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 554 } 555 556 sctx.rs = rs; 557 sctx.pv = rtmp[i]; 558 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 559 if (sctx.newshift) break; /* break for-loop */ 560 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 561 562 /* Mark diagonal and invert diagonal for simplier triangular solves */ 563 pv = b->a + bdiag[i]; 564 *pv = 1.0/rtmp[i]; 565 566 } /* endof for (i=0; i<n; i++) { */ 567 568 /* MatPivotRefine() */ 569 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 570 /* 571 * if no shift in this attempt & shifting & started shifting & can refine, 572 * then try lower shift 573 */ 574 sctx.shift_hi = sctx.shift_fraction; 575 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 576 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 577 sctx.newshift = PETSC_TRUE; 578 sctx.nshift++; 579 } 580 } while (sctx.newshift); 581 582 ierr = PetscFree(rtmp);CHKERRQ(ierr); 583 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 584 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 585 586 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 587 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 588 if (b->inode.size) { 589 C->ops->solve = MatSolve_SeqAIJ_Inode; 590 } else if (row_identity && col_identity) { 591 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 592 } else { 593 C->ops->solve = MatSolve_SeqAIJ; 594 } 595 C->ops->solveadd = MatSolveAdd_SeqAIJ; 596 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 597 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 598 C->ops->matsolve = MatMatSolve_SeqAIJ; 599 C->assembled = PETSC_TRUE; 600 C->preallocated = PETSC_TRUE; 601 602 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 603 604 /* MatShiftView(A,info,&sctx) */ 605 if (sctx.nshift) { 606 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 607 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 608 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 609 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 610 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 611 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 612 } 613 } 614 PetscFunctionReturn(0); 615 } 616 617 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 618 { 619 Mat C =B; 620 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 621 IS isrow = b->row,isicol = b->icol; 622 PetscErrorCode ierr; 623 const PetscInt *r,*ic,*ics; 624 PetscInt nz,row,i,j,n=A->rmap->n,diag; 625 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 626 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 627 MatScalar *pv,*rtmp,*pc,multiplier,d; 628 const MatScalar *v,*aa=a->a; 629 PetscReal rs=0.0; 630 FactorShiftCtx sctx; 631 const PetscInt *ddiag; 632 PetscBool row_identity, col_identity; 633 634 PetscFunctionBegin; 635 /* MatPivotSetUp(): initialize shift context sctx */ 636 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 637 638 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 639 ddiag = a->diag; 640 sctx.shift_top = info->zeropivot; 641 for (i=0; i<n; i++) { 642 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 643 d = (aa)[ddiag[i]]; 644 rs = -PetscAbsScalar(d) - PetscRealPart(d); 645 v = aa+ai[i]; 646 nz = ai[i+1] - ai[i]; 647 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 648 if (rs>sctx.shift_top) sctx.shift_top = rs; 649 } 650 sctx.shift_top *= 1.1; 651 sctx.nshift_max = 5; 652 sctx.shift_lo = 0.; 653 sctx.shift_hi = 1.; 654 } 655 656 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 657 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 658 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 659 ics = ic; 660 661 do { 662 sctx.newshift = PETSC_FALSE; 663 for (i=0; i<n; i++) { 664 nz = bi[i+1] - bi[i]; 665 bjtmp = bj + bi[i]; 666 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 667 668 /* load in initial (unfactored row) */ 669 nz = ai[r[i]+1] - ai[r[i]]; 670 ajtmp = aj + ai[r[i]]; 671 v = aa + ai[r[i]]; 672 for (j=0; j<nz; j++) { 673 rtmp[ics[ajtmp[j]]] = v[j]; 674 } 675 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 676 677 row = *bjtmp++; 678 while (row < i) { 679 pc = rtmp + row; 680 if (*pc != 0.0) { 681 pv = b->a + diag_offset[row]; 682 pj = b->j + diag_offset[row] + 1; 683 multiplier = *pc / *pv++; 684 *pc = multiplier; 685 nz = bi[row+1] - diag_offset[row] - 1; 686 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 687 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 688 } 689 row = *bjtmp++; 690 } 691 /* finished row so stick it into b->a */ 692 pv = b->a + bi[i]; 693 pj = b->j + bi[i]; 694 nz = bi[i+1] - bi[i]; 695 diag = diag_offset[i] - bi[i]; 696 rs = 0.0; 697 for (j=0; j<nz; j++) { 698 pv[j] = rtmp[pj[j]]; 699 rs += PetscAbsScalar(pv[j]); 700 } 701 rs -= PetscAbsScalar(pv[diag]); 702 703 sctx.rs = rs; 704 sctx.pv = pv[diag]; 705 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 706 if (sctx.newshift) break; 707 pv[diag] = sctx.pv; 708 } 709 710 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 711 /* 712 * if no shift in this attempt & shifting & started shifting & can refine, 713 * then try lower shift 714 */ 715 sctx.shift_hi = sctx.shift_fraction; 716 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 717 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 718 sctx.newshift = PETSC_TRUE; 719 sctx.nshift++; 720 } 721 } while (sctx.newshift); 722 723 /* invert diagonal entries for simplier triangular solves */ 724 for (i=0; i<n; i++) { 725 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 726 } 727 ierr = PetscFree(rtmp);CHKERRQ(ierr); 728 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 729 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 730 731 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 732 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 733 if (row_identity && col_identity) { 734 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 735 } else { 736 C->ops->solve = MatSolve_SeqAIJ_inplace; 737 } 738 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 739 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 740 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 741 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 742 743 C->assembled = PETSC_TRUE; 744 C->preallocated = PETSC_TRUE; 745 746 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 747 if (sctx.nshift) { 748 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 749 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 750 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 751 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 752 } 753 } 754 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 755 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 756 757 ierr = MatSeqAIJCheckInode(C);CHKERRQ(ierr); 758 PetscFunctionReturn(0); 759 } 760 761 /* 762 This routine implements inplace ILU(0) with row or/and column permutations. 763 Input: 764 A - original matrix 765 Output; 766 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 767 a->j (col index) is permuted by the inverse of colperm, then sorted 768 a->a reordered accordingly with a->j 769 a->diag (ptr to diagonal elements) is updated. 770 */ 771 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 772 { 773 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 774 IS isrow = a->row,isicol = a->icol; 775 PetscErrorCode ierr; 776 const PetscInt *r,*ic,*ics; 777 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 778 PetscInt *ajtmp,nz,row; 779 PetscInt *diag = a->diag,nbdiag,*pj; 780 PetscScalar *rtmp,*pc,multiplier,d; 781 MatScalar *pv,*v; 782 PetscReal rs; 783 FactorShiftCtx sctx; 784 const MatScalar *aa=a->a,*vtmp; 785 786 PetscFunctionBegin; 787 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 788 789 /* MatPivotSetUp(): initialize shift context sctx */ 790 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 791 792 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 793 const PetscInt *ddiag = a->diag; 794 sctx.shift_top = info->zeropivot; 795 for (i=0; i<n; i++) { 796 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 797 d = (aa)[ddiag[i]]; 798 rs = -PetscAbsScalar(d) - PetscRealPart(d); 799 vtmp = aa+ai[i]; 800 nz = ai[i+1] - ai[i]; 801 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 802 if (rs>sctx.shift_top) sctx.shift_top = rs; 803 } 804 sctx.shift_top *= 1.1; 805 sctx.nshift_max = 5; 806 sctx.shift_lo = 0.; 807 sctx.shift_hi = 1.; 808 } 809 810 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 811 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 812 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 813 ierr = PetscArrayzero(rtmp,n+1);CHKERRQ(ierr); 814 ics = ic; 815 816 #if defined(MV) 817 sctx.shift_top = 0.; 818 sctx.nshift_max = 0; 819 sctx.shift_lo = 0.; 820 sctx.shift_hi = 0.; 821 sctx.shift_fraction = 0.; 822 823 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 824 sctx.shift_top = 0.; 825 for (i=0; i<n; i++) { 826 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 827 d = (a->a)[diag[i]]; 828 rs = -PetscAbsScalar(d) - PetscRealPart(d); 829 v = a->a+ai[i]; 830 nz = ai[i+1] - ai[i]; 831 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 832 if (rs>sctx.shift_top) sctx.shift_top = rs; 833 } 834 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 835 sctx.shift_top *= 1.1; 836 sctx.nshift_max = 5; 837 sctx.shift_lo = 0.; 838 sctx.shift_hi = 1.; 839 } 840 841 sctx.shift_amount = 0.; 842 sctx.nshift = 0; 843 #endif 844 845 do { 846 sctx.newshift = PETSC_FALSE; 847 for (i=0; i<n; i++) { 848 /* load in initial unfactored row */ 849 nz = ai[r[i]+1] - ai[r[i]]; 850 ajtmp = aj + ai[r[i]]; 851 v = a->a + ai[r[i]]; 852 /* sort permuted ajtmp and values v accordingly */ 853 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 854 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 855 856 diag[r[i]] = ai[r[i]]; 857 for (j=0; j<nz; j++) { 858 rtmp[ajtmp[j]] = v[j]; 859 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 860 } 861 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 862 863 row = *ajtmp++; 864 while (row < i) { 865 pc = rtmp + row; 866 if (*pc != 0.0) { 867 pv = a->a + diag[r[row]]; 868 pj = aj + diag[r[row]] + 1; 869 870 multiplier = *pc / *pv++; 871 *pc = multiplier; 872 nz = ai[r[row]+1] - diag[r[row]] - 1; 873 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 874 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 875 } 876 row = *ajtmp++; 877 } 878 /* finished row so overwrite it onto a->a */ 879 pv = a->a + ai[r[i]]; 880 pj = aj + ai[r[i]]; 881 nz = ai[r[i]+1] - ai[r[i]]; 882 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 883 884 rs = 0.0; 885 for (j=0; j<nz; j++) { 886 pv[j] = rtmp[pj[j]]; 887 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 888 } 889 890 sctx.rs = rs; 891 sctx.pv = pv[nbdiag]; 892 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 893 if (sctx.newshift) break; 894 pv[nbdiag] = sctx.pv; 895 } 896 897 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 898 /* 899 * if no shift in this attempt & shifting & started shifting & can refine, 900 * then try lower shift 901 */ 902 sctx.shift_hi = sctx.shift_fraction; 903 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 904 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 905 sctx.newshift = PETSC_TRUE; 906 sctx.nshift++; 907 } 908 } while (sctx.newshift); 909 910 /* invert diagonal entries for simplier triangular solves */ 911 for (i=0; i<n; i++) { 912 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 913 } 914 915 ierr = PetscFree(rtmp);CHKERRQ(ierr); 916 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 917 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 918 919 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 920 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 921 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 922 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 923 924 A->assembled = PETSC_TRUE; 925 A->preallocated = PETSC_TRUE; 926 927 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 928 if (sctx.nshift) { 929 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 930 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 931 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 932 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 933 } 934 } 935 PetscFunctionReturn(0); 936 } 937 938 /* ----------------------------------------------------------- */ 939 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 940 { 941 PetscErrorCode ierr; 942 Mat C; 943 944 PetscFunctionBegin; 945 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 946 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 947 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 948 949 A->ops->solve = C->ops->solve; 950 A->ops->solvetranspose = C->ops->solvetranspose; 951 952 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 953 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 /* ----------------------------------------------------------- */ 957 958 959 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 960 { 961 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 962 IS iscol = a->col,isrow = a->row; 963 PetscErrorCode ierr; 964 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 965 PetscInt nz; 966 const PetscInt *rout,*cout,*r,*c; 967 PetscScalar *x,*tmp,*tmps,sum; 968 const PetscScalar *b; 969 const MatScalar *aa = a->a,*v; 970 971 PetscFunctionBegin; 972 if (!n) PetscFunctionReturn(0); 973 974 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 975 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 976 tmp = a->solve_work; 977 978 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 979 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 980 981 /* forward solve the lower triangular */ 982 tmp[0] = b[*r++]; 983 tmps = tmp; 984 for (i=1; i<n; i++) { 985 v = aa + ai[i]; 986 vi = aj + ai[i]; 987 nz = a->diag[i] - ai[i]; 988 sum = b[*r++]; 989 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 990 tmp[i] = sum; 991 } 992 993 /* backward solve the upper triangular */ 994 for (i=n-1; i>=0; i--) { 995 v = aa + a->diag[i] + 1; 996 vi = aj + a->diag[i] + 1; 997 nz = ai[i+1] - a->diag[i] - 1; 998 sum = tmp[i]; 999 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1000 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1001 } 1002 1003 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1004 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1005 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1006 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1007 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1012 { 1013 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1014 IS iscol = a->col,isrow = a->row; 1015 PetscErrorCode ierr; 1016 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1017 PetscInt nz,neq; 1018 const PetscInt *rout,*cout,*r,*c; 1019 PetscScalar *x,*tmp,*tmps,sum; 1020 const PetscScalar *aa = a->a,*v; 1021 const PetscScalar *b; 1022 PetscBool bisdense,xisdense; 1023 1024 PetscFunctionBegin; 1025 if (!n) PetscFunctionReturn(0); 1026 1027 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1028 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1029 if (X != B) { 1030 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1031 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1032 } 1033 1034 ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); 1035 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1036 1037 tmp = a->solve_work; 1038 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1039 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1040 1041 for (neq=0; neq<B->cmap->n; neq++) { 1042 /* forward solve the lower triangular */ 1043 tmp[0] = b[r[0]]; 1044 tmps = tmp; 1045 for (i=1; i<n; i++) { 1046 v = aa + ai[i]; 1047 vi = aj + ai[i]; 1048 nz = a->diag[i] - ai[i]; 1049 sum = b[r[i]]; 1050 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1051 tmp[i] = sum; 1052 } 1053 /* backward solve the upper triangular */ 1054 for (i=n-1; i>=0; i--) { 1055 v = aa + a->diag[i] + 1; 1056 vi = aj + a->diag[i] + 1; 1057 nz = ai[i+1] - a->diag[i] - 1; 1058 sum = tmp[i]; 1059 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1060 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1061 } 1062 1063 b += n; 1064 x += n; 1065 } 1066 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1067 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1068 ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); 1069 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1070 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1075 { 1076 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1077 IS iscol = a->col,isrow = a->row; 1078 PetscErrorCode ierr; 1079 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1080 PetscInt nz,neq; 1081 const PetscInt *rout,*cout,*r,*c; 1082 PetscScalar *x,*tmp,sum; 1083 const PetscScalar *b; 1084 const PetscScalar *aa = a->a,*v; 1085 PetscBool bisdense,xisdense; 1086 1087 PetscFunctionBegin; 1088 if (!n) PetscFunctionReturn(0); 1089 1090 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1091 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1092 if (X != B) { 1093 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1094 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1095 } 1096 1097 ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); 1098 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1099 1100 tmp = a->solve_work; 1101 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1102 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1103 1104 for (neq=0; neq<B->cmap->n; neq++) { 1105 /* forward solve the lower triangular */ 1106 tmp[0] = b[r[0]]; 1107 v = aa; 1108 vi = aj; 1109 for (i=1; i<n; i++) { 1110 nz = ai[i+1] - ai[i]; 1111 sum = b[r[i]]; 1112 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1113 tmp[i] = sum; 1114 v += nz; vi += nz; 1115 } 1116 1117 /* backward solve the upper triangular */ 1118 for (i=n-1; i>=0; i--) { 1119 v = aa + adiag[i+1]+1; 1120 vi = aj + adiag[i+1]+1; 1121 nz = adiag[i]-adiag[i+1]-1; 1122 sum = tmp[i]; 1123 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1124 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1125 } 1126 1127 b += n; 1128 x += n; 1129 } 1130 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1131 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1132 ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); 1133 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1134 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1139 { 1140 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1141 IS iscol = a->col,isrow = a->row; 1142 PetscErrorCode ierr; 1143 const PetscInt *r,*c,*rout,*cout; 1144 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1145 PetscInt nz,row; 1146 PetscScalar *x,*tmp,*tmps,sum; 1147 const PetscScalar *b; 1148 const MatScalar *aa = a->a,*v; 1149 1150 PetscFunctionBegin; 1151 if (!n) PetscFunctionReturn(0); 1152 1153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1154 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1155 tmp = a->solve_work; 1156 1157 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1158 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1159 1160 /* forward solve the lower triangular */ 1161 tmp[0] = b[*r++]; 1162 tmps = tmp; 1163 for (row=1; row<n; row++) { 1164 i = rout[row]; /* permuted row */ 1165 v = aa + ai[i]; 1166 vi = aj + ai[i]; 1167 nz = a->diag[i] - ai[i]; 1168 sum = b[*r++]; 1169 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1170 tmp[row] = sum; 1171 } 1172 1173 /* backward solve the upper triangular */ 1174 for (row=n-1; row>=0; row--) { 1175 i = rout[row]; /* permuted row */ 1176 v = aa + a->diag[i] + 1; 1177 vi = aj + a->diag[i] + 1; 1178 nz = ai[i+1] - a->diag[i] - 1; 1179 sum = tmp[row]; 1180 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1181 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1182 } 1183 1184 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1185 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1186 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1187 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1188 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 /* ----------------------------------------------------------- */ 1193 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1194 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1195 { 1196 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1197 PetscErrorCode ierr; 1198 PetscInt n = A->rmap->n; 1199 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1200 PetscScalar *x; 1201 const PetscScalar *b; 1202 const MatScalar *aa = a->a; 1203 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1204 PetscInt adiag_i,i,nz,ai_i; 1205 const PetscInt *vi; 1206 const MatScalar *v; 1207 PetscScalar sum; 1208 #endif 1209 1210 PetscFunctionBegin; 1211 if (!n) PetscFunctionReturn(0); 1212 1213 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1214 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1215 1216 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1217 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1218 #else 1219 /* forward solve the lower triangular */ 1220 x[0] = b[0]; 1221 for (i=1; i<n; i++) { 1222 ai_i = ai[i]; 1223 v = aa + ai_i; 1224 vi = aj + ai_i; 1225 nz = adiag[i] - ai_i; 1226 sum = b[i]; 1227 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1228 x[i] = sum; 1229 } 1230 1231 /* backward solve the upper triangular */ 1232 for (i=n-1; i>=0; i--) { 1233 adiag_i = adiag[i]; 1234 v = aa + adiag_i + 1; 1235 vi = aj + adiag_i + 1; 1236 nz = ai[i+1] - adiag_i - 1; 1237 sum = x[i]; 1238 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1239 x[i] = sum*aa[adiag_i]; 1240 } 1241 #endif 1242 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1243 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1244 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1249 { 1250 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1251 IS iscol = a->col,isrow = a->row; 1252 PetscErrorCode ierr; 1253 PetscInt i, n = A->rmap->n,j; 1254 PetscInt nz; 1255 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1256 PetscScalar *x,*tmp,sum; 1257 const PetscScalar *b; 1258 const MatScalar *aa = a->a,*v; 1259 1260 PetscFunctionBegin; 1261 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1262 1263 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1264 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1265 tmp = a->solve_work; 1266 1267 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1268 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1269 1270 /* forward solve the lower triangular */ 1271 tmp[0] = b[*r++]; 1272 for (i=1; i<n; i++) { 1273 v = aa + ai[i]; 1274 vi = aj + ai[i]; 1275 nz = a->diag[i] - ai[i]; 1276 sum = b[*r++]; 1277 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1278 tmp[i] = sum; 1279 } 1280 1281 /* backward solve the upper triangular */ 1282 for (i=n-1; i>=0; i--) { 1283 v = aa + a->diag[i] + 1; 1284 vi = aj + a->diag[i] + 1; 1285 nz = ai[i+1] - a->diag[i] - 1; 1286 sum = tmp[i]; 1287 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1288 tmp[i] = sum*aa[a->diag[i]]; 1289 x[*c--] += tmp[i]; 1290 } 1291 1292 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1293 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1294 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1295 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1296 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1301 { 1302 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1303 IS iscol = a->col,isrow = a->row; 1304 PetscErrorCode ierr; 1305 PetscInt i, n = A->rmap->n,j; 1306 PetscInt nz; 1307 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1308 PetscScalar *x,*tmp,sum; 1309 const PetscScalar *b; 1310 const MatScalar *aa = a->a,*v; 1311 1312 PetscFunctionBegin; 1313 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1314 1315 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1316 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1317 tmp = a->solve_work; 1318 1319 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1320 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1321 1322 /* forward solve the lower triangular */ 1323 tmp[0] = b[r[0]]; 1324 v = aa; 1325 vi = aj; 1326 for (i=1; i<n; i++) { 1327 nz = ai[i+1] - ai[i]; 1328 sum = b[r[i]]; 1329 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1330 tmp[i] = sum; 1331 v += nz; 1332 vi += nz; 1333 } 1334 1335 /* backward solve the upper triangular */ 1336 v = aa + adiag[n-1]; 1337 vi = aj + adiag[n-1]; 1338 for (i=n-1; i>=0; i--) { 1339 nz = adiag[i] - adiag[i+1] - 1; 1340 sum = tmp[i]; 1341 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1342 tmp[i] = sum*v[nz]; 1343 x[c[i]] += tmp[i]; 1344 v += nz+1; vi += nz+1; 1345 } 1346 1347 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1348 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1349 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1350 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1351 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } 1354 1355 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1356 { 1357 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1358 IS iscol = a->col,isrow = a->row; 1359 PetscErrorCode ierr; 1360 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1361 PetscInt i,n = A->rmap->n,j; 1362 PetscInt nz; 1363 PetscScalar *x,*tmp,s1; 1364 const MatScalar *aa = a->a,*v; 1365 const PetscScalar *b; 1366 1367 PetscFunctionBegin; 1368 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1369 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1370 tmp = a->solve_work; 1371 1372 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1373 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1374 1375 /* copy the b into temp work space according to permutation */ 1376 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1377 1378 /* forward solve the U^T */ 1379 for (i=0; i<n; i++) { 1380 v = aa + diag[i]; 1381 vi = aj + diag[i] + 1; 1382 nz = ai[i+1] - diag[i] - 1; 1383 s1 = tmp[i]; 1384 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1385 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1386 tmp[i] = s1; 1387 } 1388 1389 /* backward solve the L^T */ 1390 for (i=n-1; i>=0; i--) { 1391 v = aa + diag[i] - 1; 1392 vi = aj + diag[i] - 1; 1393 nz = diag[i] - ai[i]; 1394 s1 = tmp[i]; 1395 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1396 } 1397 1398 /* copy tmp into x according to permutation */ 1399 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1400 1401 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1402 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1403 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1404 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1405 1406 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1411 { 1412 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1413 IS iscol = a->col,isrow = a->row; 1414 PetscErrorCode ierr; 1415 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1416 PetscInt i,n = A->rmap->n,j; 1417 PetscInt nz; 1418 PetscScalar *x,*tmp,s1; 1419 const MatScalar *aa = a->a,*v; 1420 const PetscScalar *b; 1421 1422 PetscFunctionBegin; 1423 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1424 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1425 tmp = a->solve_work; 1426 1427 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1428 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1429 1430 /* copy the b into temp work space according to permutation */ 1431 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1432 1433 /* forward solve the U^T */ 1434 for (i=0; i<n; i++) { 1435 v = aa + adiag[i+1] + 1; 1436 vi = aj + adiag[i+1] + 1; 1437 nz = adiag[i] - adiag[i+1] - 1; 1438 s1 = tmp[i]; 1439 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1440 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1441 tmp[i] = s1; 1442 } 1443 1444 /* backward solve the L^T */ 1445 for (i=n-1; i>=0; i--) { 1446 v = aa + ai[i]; 1447 vi = aj + ai[i]; 1448 nz = ai[i+1] - ai[i]; 1449 s1 = tmp[i]; 1450 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1451 } 1452 1453 /* copy tmp into x according to permutation */ 1454 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1455 1456 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1457 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1458 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1459 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1460 1461 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1462 PetscFunctionReturn(0); 1463 } 1464 1465 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1466 { 1467 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1468 IS iscol = a->col,isrow = a->row; 1469 PetscErrorCode ierr; 1470 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1471 PetscInt i,n = A->rmap->n,j; 1472 PetscInt nz; 1473 PetscScalar *x,*tmp,s1; 1474 const MatScalar *aa = a->a,*v; 1475 const PetscScalar *b; 1476 1477 PetscFunctionBegin; 1478 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1479 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1480 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1481 tmp = a->solve_work; 1482 1483 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1484 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1485 1486 /* copy the b into temp work space according to permutation */ 1487 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1488 1489 /* forward solve the U^T */ 1490 for (i=0; i<n; i++) { 1491 v = aa + diag[i]; 1492 vi = aj + diag[i] + 1; 1493 nz = ai[i+1] - diag[i] - 1; 1494 s1 = tmp[i]; 1495 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1496 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1497 tmp[i] = s1; 1498 } 1499 1500 /* backward solve the L^T */ 1501 for (i=n-1; i>=0; i--) { 1502 v = aa + diag[i] - 1; 1503 vi = aj + diag[i] - 1; 1504 nz = diag[i] - ai[i]; 1505 s1 = tmp[i]; 1506 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1507 } 1508 1509 /* copy tmp into x according to permutation */ 1510 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1511 1512 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1513 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1514 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1515 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1516 1517 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1518 PetscFunctionReturn(0); 1519 } 1520 1521 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1522 { 1523 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1524 IS iscol = a->col,isrow = a->row; 1525 PetscErrorCode ierr; 1526 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1527 PetscInt i,n = A->rmap->n,j; 1528 PetscInt nz; 1529 PetscScalar *x,*tmp,s1; 1530 const MatScalar *aa = a->a,*v; 1531 const PetscScalar *b; 1532 1533 PetscFunctionBegin; 1534 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1535 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1536 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1537 tmp = a->solve_work; 1538 1539 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1540 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1541 1542 /* copy the b into temp work space according to permutation */ 1543 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1544 1545 /* forward solve the U^T */ 1546 for (i=0; i<n; i++) { 1547 v = aa + adiag[i+1] + 1; 1548 vi = aj + adiag[i+1] + 1; 1549 nz = adiag[i] - adiag[i+1] - 1; 1550 s1 = tmp[i]; 1551 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1552 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1553 tmp[i] = s1; 1554 } 1555 1556 1557 /* backward solve the L^T */ 1558 for (i=n-1; i>=0; i--) { 1559 v = aa + ai[i]; 1560 vi = aj + ai[i]; 1561 nz = ai[i+1] - ai[i]; 1562 s1 = tmp[i]; 1563 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1564 } 1565 1566 /* copy tmp into x according to permutation */ 1567 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1568 1569 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1570 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1571 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1572 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1573 1574 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 /* ----------------------------------------------------------------*/ 1579 1580 /* 1581 ilu() under revised new data structure. 1582 Factored arrays bj and ba are stored as 1583 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1584 1585 bi=fact->i is an array of size n+1, in which 1586 bi+ 1587 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1588 bi[n]: points to L(n-1,n-1)+1 1589 1590 bdiag=fact->diag is an array of size n+1,in which 1591 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1592 bdiag[n]: points to entry of U(n-1,0)-1 1593 1594 U(i,:) contains bdiag[i] as its last entry, i.e., 1595 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1596 */ 1597 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1598 { 1599 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1600 PetscErrorCode ierr; 1601 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1602 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1603 IS isicol; 1604 1605 PetscFunctionBegin; 1606 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1607 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1608 b = (Mat_SeqAIJ*)(fact)->data; 1609 1610 /* allocate matrix arrays for new data structure */ 1611 ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); 1612 ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1613 1614 b->singlemalloc = PETSC_TRUE; 1615 if (!b->diag) { 1616 ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr); 1617 ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1618 } 1619 bdiag = b->diag; 1620 1621 if (n > 0) { 1622 ierr = PetscArrayzero(b->a,ai[n]);CHKERRQ(ierr); 1623 } 1624 1625 /* set bi and bj with new data structure */ 1626 bi = b->i; 1627 bj = b->j; 1628 1629 /* L part */ 1630 bi[0] = 0; 1631 for (i=0; i<n; i++) { 1632 nz = adiag[i] - ai[i]; 1633 bi[i+1] = bi[i] + nz; 1634 aj = a->j + ai[i]; 1635 for (j=0; j<nz; j++) { 1636 /* *bj = aj[j]; bj++; */ 1637 bj[k++] = aj[j]; 1638 } 1639 } 1640 1641 /* U part */ 1642 bdiag[n] = bi[n]-1; 1643 for (i=n-1; i>=0; i--) { 1644 nz = ai[i+1] - adiag[i] - 1; 1645 aj = a->j + adiag[i] + 1; 1646 for (j=0; j<nz; j++) { 1647 /* *bj = aj[j]; bj++; */ 1648 bj[k++] = aj[j]; 1649 } 1650 /* diag[i] */ 1651 /* *bj = i; bj++; */ 1652 bj[k++] = i; 1653 bdiag[i] = bdiag[i+1] + nz + 1; 1654 } 1655 1656 fact->factortype = MAT_FACTOR_ILU; 1657 fact->info.factor_mallocs = 0; 1658 fact->info.fill_ratio_given = info->fill; 1659 fact->info.fill_ratio_needed = 1.0; 1660 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1661 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1662 1663 b = (Mat_SeqAIJ*)(fact)->data; 1664 b->row = isrow; 1665 b->col = iscol; 1666 b->icol = isicol; 1667 ierr = PetscMalloc1(fact->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1668 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1669 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1670 PetscFunctionReturn(0); 1671 } 1672 1673 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1674 { 1675 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1676 IS isicol; 1677 PetscErrorCode ierr; 1678 const PetscInt *r,*ic; 1679 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1680 PetscInt *bi,*cols,nnz,*cols_lvl; 1681 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1682 PetscInt i,levels,diagonal_fill; 1683 PetscBool col_identity,row_identity,missing; 1684 PetscReal f; 1685 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1686 PetscBT lnkbt; 1687 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1688 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1689 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1690 1691 PetscFunctionBegin; 1692 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1693 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1694 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1695 1696 levels = (PetscInt)info->levels; 1697 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1698 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1699 if (!levels && row_identity && col_identity) { 1700 /* special case: ilu(0) with natural ordering */ 1701 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1702 if (a->inode.size) { 1703 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1709 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1710 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1711 1712 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1713 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1714 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1715 bi[0] = bdiag[0] = 0; 1716 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1717 1718 /* create a linked list for storing column indices of the active row */ 1719 nlnk = n + 1; 1720 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1721 1722 /* initial FreeSpace size is f*(ai[n]+1) */ 1723 f = info->fill; 1724 diagonal_fill = (PetscInt)info->diagonal_fill; 1725 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 1726 current_space = free_space; 1727 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 1728 current_space_lvl = free_space_lvl; 1729 for (i=0; i<n; i++) { 1730 nzi = 0; 1731 /* copy current row into linked list */ 1732 nnz = ai[r[i]+1] - ai[r[i]]; 1733 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1734 cols = aj + ai[r[i]]; 1735 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1736 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1737 nzi += nlnk; 1738 1739 /* make sure diagonal entry is included */ 1740 if (diagonal_fill && lnk[i] == -1) { 1741 fm = n; 1742 while (lnk[fm] < i) fm = lnk[fm]; 1743 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1744 lnk[fm] = i; 1745 lnk_lvl[i] = 0; 1746 nzi++; dcount++; 1747 } 1748 1749 /* add pivot rows into the active row */ 1750 nzbd = 0; 1751 prow = lnk[n]; 1752 while (prow < i) { 1753 nnz = bdiag[prow]; 1754 cols = bj_ptr[prow] + nnz + 1; 1755 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1756 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1757 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1758 nzi += nlnk; 1759 prow = lnk[prow]; 1760 nzbd++; 1761 } 1762 bdiag[i] = nzbd; 1763 bi[i+1] = bi[i] + nzi; 1764 /* if free space is not available, make more free space */ 1765 if (current_space->local_remaining<nzi) { 1766 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */ 1767 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1768 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1769 reallocs++; 1770 } 1771 1772 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1773 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1774 bj_ptr[i] = current_space->array; 1775 bjlvl_ptr[i] = current_space_lvl->array; 1776 1777 /* make sure the active row i has diagonal entry */ 1778 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1779 1780 current_space->array += nzi; 1781 current_space->local_used += nzi; 1782 current_space->local_remaining -= nzi; 1783 current_space_lvl->array += nzi; 1784 current_space_lvl->local_used += nzi; 1785 current_space_lvl->local_remaining -= nzi; 1786 } 1787 1788 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1789 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1790 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1791 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 1792 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1793 1794 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1795 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1796 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1797 1798 #if defined(PETSC_USE_INFO) 1799 { 1800 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1801 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 1802 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1803 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 1804 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1805 if (diagonal_fill) { 1806 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 1807 } 1808 } 1809 #endif 1810 /* put together the new matrix */ 1811 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1812 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 1813 b = (Mat_SeqAIJ*)(fact)->data; 1814 1815 b->free_a = PETSC_TRUE; 1816 b->free_ij = PETSC_TRUE; 1817 b->singlemalloc = PETSC_FALSE; 1818 1819 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 1820 1821 b->j = bj; 1822 b->i = bi; 1823 b->diag = bdiag; 1824 b->ilen = 0; 1825 b->imax = 0; 1826 b->row = isrow; 1827 b->col = iscol; 1828 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1829 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1830 b->icol = isicol; 1831 1832 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 1833 /* In b structure: Free imax, ilen, old a, old j. 1834 Allocate bdiag, solve_work, new a, new j */ 1835 ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1836 b->maxnz = b->nz = bdiag[0]+1; 1837 1838 (fact)->info.factor_mallocs = reallocs; 1839 (fact)->info.fill_ratio_given = f; 1840 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1841 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1842 if (a->inode.size) { 1843 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1844 } 1845 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1846 PetscFunctionReturn(0); 1847 } 1848 1849 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1850 { 1851 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1852 IS isicol; 1853 PetscErrorCode ierr; 1854 const PetscInt *r,*ic; 1855 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1856 PetscInt *bi,*cols,nnz,*cols_lvl; 1857 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1858 PetscInt i,levels,diagonal_fill; 1859 PetscBool col_identity,row_identity; 1860 PetscReal f; 1861 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1862 PetscBT lnkbt; 1863 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1864 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1865 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1866 PetscBool missing; 1867 1868 PetscFunctionBegin; 1869 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1870 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1871 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1872 1873 f = info->fill; 1874 levels = (PetscInt)info->levels; 1875 diagonal_fill = (PetscInt)info->diagonal_fill; 1876 1877 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1878 1879 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1880 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1881 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1882 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1883 1884 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1885 if (a->inode.size) { 1886 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1887 } 1888 fact->factortype = MAT_FACTOR_ILU; 1889 (fact)->info.factor_mallocs = 0; 1890 (fact)->info.fill_ratio_given = info->fill; 1891 (fact)->info.fill_ratio_needed = 1.0; 1892 1893 b = (Mat_SeqAIJ*)(fact)->data; 1894 b->row = isrow; 1895 b->col = iscol; 1896 b->icol = isicol; 1897 ierr = PetscMalloc1((fact)->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1898 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1899 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1900 PetscFunctionReturn(0); 1901 } 1902 1903 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1904 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1905 1906 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1907 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1908 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1909 bi[0] = bdiag[0] = 0; 1910 1911 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1912 1913 /* create a linked list for storing column indices of the active row */ 1914 nlnk = n + 1; 1915 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1916 1917 /* initial FreeSpace size is f*(ai[n]+1) */ 1918 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 1919 current_space = free_space; 1920 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 1921 current_space_lvl = free_space_lvl; 1922 1923 for (i=0; i<n; i++) { 1924 nzi = 0; 1925 /* copy current row into linked list */ 1926 nnz = ai[r[i]+1] - ai[r[i]]; 1927 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1928 cols = aj + ai[r[i]]; 1929 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1930 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1931 nzi += nlnk; 1932 1933 /* make sure diagonal entry is included */ 1934 if (diagonal_fill && lnk[i] == -1) { 1935 fm = n; 1936 while (lnk[fm] < i) fm = lnk[fm]; 1937 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1938 lnk[fm] = i; 1939 lnk_lvl[i] = 0; 1940 nzi++; dcount++; 1941 } 1942 1943 /* add pivot rows into the active row */ 1944 nzbd = 0; 1945 prow = lnk[n]; 1946 while (prow < i) { 1947 nnz = bdiag[prow]; 1948 cols = bj_ptr[prow] + nnz + 1; 1949 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1950 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1951 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1952 nzi += nlnk; 1953 prow = lnk[prow]; 1954 nzbd++; 1955 } 1956 bdiag[i] = nzbd; 1957 bi[i+1] = bi[i] + nzi; 1958 1959 /* if free space is not available, make more free space */ 1960 if (current_space->local_remaining<nzi) { 1961 nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */ 1962 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1963 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1964 reallocs++; 1965 } 1966 1967 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1968 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1969 bj_ptr[i] = current_space->array; 1970 bjlvl_ptr[i] = current_space_lvl->array; 1971 1972 /* make sure the active row i has diagonal entry */ 1973 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1974 1975 current_space->array += nzi; 1976 current_space->local_used += nzi; 1977 current_space->local_remaining -= nzi; 1978 current_space_lvl->array += nzi; 1979 current_space_lvl->local_used += nzi; 1980 current_space_lvl->local_remaining -= nzi; 1981 } 1982 1983 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1984 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1985 1986 /* destroy list of free space and other temporary arrays */ 1987 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 1988 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 1989 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1990 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1991 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1992 1993 #if defined(PETSC_USE_INFO) 1994 { 1995 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1996 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 1997 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1998 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 1999 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2000 if (diagonal_fill) { 2001 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 2002 } 2003 } 2004 #endif 2005 2006 /* put together the new matrix */ 2007 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2008 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 2009 b = (Mat_SeqAIJ*)(fact)->data; 2010 2011 b->free_a = PETSC_TRUE; 2012 b->free_ij = PETSC_TRUE; 2013 b->singlemalloc = PETSC_FALSE; 2014 2015 ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr); 2016 b->j = bj; 2017 b->i = bi; 2018 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2019 b->diag = bdiag; 2020 b->ilen = 0; 2021 b->imax = 0; 2022 b->row = isrow; 2023 b->col = iscol; 2024 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2025 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2026 b->icol = isicol; 2027 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 2028 /* In b structure: Free imax, ilen, old a, old j. 2029 Allocate bdiag, solve_work, new a, new j */ 2030 ierr = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2031 b->maxnz = b->nz = bi[n]; 2032 2033 (fact)->info.factor_mallocs = reallocs; 2034 (fact)->info.fill_ratio_given = f; 2035 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2036 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2037 if (a->inode.size) { 2038 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2039 } 2040 PetscFunctionReturn(0); 2041 } 2042 2043 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2044 { 2045 Mat C = B; 2046 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2047 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2048 IS ip=b->row,iip = b->icol; 2049 PetscErrorCode ierr; 2050 const PetscInt *rip,*riip; 2051 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2052 PetscInt *ai=a->i,*aj=a->j; 2053 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2054 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2055 PetscBool perm_identity; 2056 FactorShiftCtx sctx; 2057 PetscReal rs; 2058 MatScalar d,*v; 2059 2060 PetscFunctionBegin; 2061 /* MatPivotSetUp(): initialize shift context sctx */ 2062 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2063 2064 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2065 sctx.shift_top = info->zeropivot; 2066 for (i=0; i<mbs; i++) { 2067 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2068 d = (aa)[a->diag[i]]; 2069 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2070 v = aa+ai[i]; 2071 nz = ai[i+1] - ai[i]; 2072 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2073 if (rs>sctx.shift_top) sctx.shift_top = rs; 2074 } 2075 sctx.shift_top *= 1.1; 2076 sctx.nshift_max = 5; 2077 sctx.shift_lo = 0.; 2078 sctx.shift_hi = 1.; 2079 } 2080 2081 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2082 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2083 2084 /* allocate working arrays 2085 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2086 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2087 */ 2088 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr); 2089 2090 do { 2091 sctx.newshift = PETSC_FALSE; 2092 2093 for (i=0; i<mbs; i++) c2r[i] = mbs; 2094 if (mbs) il[0] = 0; 2095 2096 for (k = 0; k<mbs; k++) { 2097 /* zero rtmp */ 2098 nz = bi[k+1] - bi[k]; 2099 bjtmp = bj + bi[k]; 2100 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2101 2102 /* load in initial unfactored row */ 2103 bval = ba + bi[k]; 2104 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2105 for (j = jmin; j < jmax; j++) { 2106 col = riip[aj[j]]; 2107 if (col >= k) { /* only take upper triangular entry */ 2108 rtmp[col] = aa[j]; 2109 *bval++ = 0.0; /* for in-place factorization */ 2110 } 2111 } 2112 /* shift the diagonal of the matrix: ZeropivotApply() */ 2113 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2114 2115 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2116 dk = rtmp[k]; 2117 i = c2r[k]; /* first row to be added to k_th row */ 2118 2119 while (i < k) { 2120 nexti = c2r[i]; /* next row to be added to k_th row */ 2121 2122 /* compute multiplier, update diag(k) and U(i,k) */ 2123 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2124 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2125 dk += uikdi*ba[ili]; /* update diag[k] */ 2126 ba[ili] = uikdi; /* -U(i,k) */ 2127 2128 /* add multiple of row i to k-th row */ 2129 jmin = ili + 1; jmax = bi[i+1]; 2130 if (jmin < jmax) { 2131 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2132 /* update il and c2r for row i */ 2133 il[i] = jmin; 2134 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2135 } 2136 i = nexti; 2137 } 2138 2139 /* copy data into U(k,:) */ 2140 rs = 0.0; 2141 jmin = bi[k]; jmax = bi[k+1]-1; 2142 if (jmin < jmax) { 2143 for (j=jmin; j<jmax; j++) { 2144 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2145 } 2146 /* add the k-th row into il and c2r */ 2147 il[k] = jmin; 2148 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2149 } 2150 2151 /* MatPivotCheck() */ 2152 sctx.rs = rs; 2153 sctx.pv = dk; 2154 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 2155 if (sctx.newshift) break; 2156 dk = sctx.pv; 2157 2158 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2159 } 2160 } while (sctx.newshift); 2161 2162 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2163 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2164 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2165 2166 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2167 if (perm_identity) { 2168 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2169 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2170 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2171 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2172 } else { 2173 B->ops->solve = MatSolve_SeqSBAIJ_1; 2174 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2175 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2176 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2177 } 2178 2179 C->assembled = PETSC_TRUE; 2180 C->preallocated = PETSC_TRUE; 2181 2182 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2183 2184 /* MatPivotView() */ 2185 if (sctx.nshift) { 2186 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2187 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 2188 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2189 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2190 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2191 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 2192 } 2193 } 2194 PetscFunctionReturn(0); 2195 } 2196 2197 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2198 { 2199 Mat C = B; 2200 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2201 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2202 IS ip=b->row,iip = b->icol; 2203 PetscErrorCode ierr; 2204 const PetscInt *rip,*riip; 2205 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2206 PetscInt *ai=a->i,*aj=a->j; 2207 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2208 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2209 PetscBool perm_identity; 2210 FactorShiftCtx sctx; 2211 PetscReal rs; 2212 MatScalar d,*v; 2213 2214 PetscFunctionBegin; 2215 /* MatPivotSetUp(): initialize shift context sctx */ 2216 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2217 2218 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2219 sctx.shift_top = info->zeropivot; 2220 for (i=0; i<mbs; i++) { 2221 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2222 d = (aa)[a->diag[i]]; 2223 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2224 v = aa+ai[i]; 2225 nz = ai[i+1] - ai[i]; 2226 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2227 if (rs>sctx.shift_top) sctx.shift_top = rs; 2228 } 2229 sctx.shift_top *= 1.1; 2230 sctx.nshift_max = 5; 2231 sctx.shift_lo = 0.; 2232 sctx.shift_hi = 1.; 2233 } 2234 2235 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2236 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2237 2238 /* initialization */ 2239 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr); 2240 2241 do { 2242 sctx.newshift = PETSC_FALSE; 2243 2244 for (i=0; i<mbs; i++) jl[i] = mbs; 2245 il[0] = 0; 2246 2247 for (k = 0; k<mbs; k++) { 2248 /* zero rtmp */ 2249 nz = bi[k+1] - bi[k]; 2250 bjtmp = bj + bi[k]; 2251 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2252 2253 bval = ba + bi[k]; 2254 /* initialize k-th row by the perm[k]-th row of A */ 2255 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2256 for (j = jmin; j < jmax; j++) { 2257 col = riip[aj[j]]; 2258 if (col >= k) { /* only take upper triangular entry */ 2259 rtmp[col] = aa[j]; 2260 *bval++ = 0.0; /* for in-place factorization */ 2261 } 2262 } 2263 /* shift the diagonal of the matrix */ 2264 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2265 2266 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2267 dk = rtmp[k]; 2268 i = jl[k]; /* first row to be added to k_th row */ 2269 2270 while (i < k) { 2271 nexti = jl[i]; /* next row to be added to k_th row */ 2272 2273 /* compute multiplier, update diag(k) and U(i,k) */ 2274 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2275 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2276 dk += uikdi*ba[ili]; 2277 ba[ili] = uikdi; /* -U(i,k) */ 2278 2279 /* add multiple of row i to k-th row */ 2280 jmin = ili + 1; jmax = bi[i+1]; 2281 if (jmin < jmax) { 2282 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2283 /* update il and jl for row i */ 2284 il[i] = jmin; 2285 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2286 } 2287 i = nexti; 2288 } 2289 2290 /* shift the diagonals when zero pivot is detected */ 2291 /* compute rs=sum of abs(off-diagonal) */ 2292 rs = 0.0; 2293 jmin = bi[k]+1; 2294 nz = bi[k+1] - jmin; 2295 bcol = bj + jmin; 2296 for (j=0; j<nz; j++) { 2297 rs += PetscAbsScalar(rtmp[bcol[j]]); 2298 } 2299 2300 sctx.rs = rs; 2301 sctx.pv = dk; 2302 ierr = MatPivotCheck(B,A,info,&sctx,k);CHKERRQ(ierr); 2303 if (sctx.newshift) break; 2304 dk = sctx.pv; 2305 2306 /* copy data into U(k,:) */ 2307 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2308 jmin = bi[k]+1; jmax = bi[k+1]; 2309 if (jmin < jmax) { 2310 for (j=jmin; j<jmax; j++) { 2311 col = bj[j]; ba[j] = rtmp[col]; 2312 } 2313 /* add the k-th row into il and jl */ 2314 il[k] = jmin; 2315 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2316 } 2317 } 2318 } while (sctx.newshift); 2319 2320 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2321 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2322 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2323 2324 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2325 if (perm_identity) { 2326 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2327 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2328 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2329 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2330 } else { 2331 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2332 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2333 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2334 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2335 } 2336 2337 C->assembled = PETSC_TRUE; 2338 C->preallocated = PETSC_TRUE; 2339 2340 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2341 if (sctx.nshift) { 2342 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2343 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2344 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2345 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2346 } 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 /* 2352 icc() under revised new data structure. 2353 Factored arrays bj and ba are stored as 2354 U(0,:),...,U(i,:),U(n-1,:) 2355 2356 ui=fact->i is an array of size n+1, in which 2357 ui+ 2358 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2359 ui[n]: points to U(n-1,n-1)+1 2360 2361 udiag=fact->diag is an array of size n,in which 2362 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2363 2364 U(i,:) contains udiag[i] as its last entry, i.e., 2365 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2366 */ 2367 2368 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2369 { 2370 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2371 Mat_SeqSBAIJ *b; 2372 PetscErrorCode ierr; 2373 PetscBool perm_identity,missing; 2374 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2375 const PetscInt *rip,*riip; 2376 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2377 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2378 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2379 PetscReal fill =info->fill,levels=info->levels; 2380 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2381 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2382 PetscBT lnkbt; 2383 IS iperm; 2384 2385 PetscFunctionBegin; 2386 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2387 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2388 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2389 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2390 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2391 2392 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2393 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2394 ui[0] = 0; 2395 2396 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2397 if (!levels && perm_identity) { 2398 for (i=0; i<am; i++) { 2399 ncols = ai[i+1] - a->diag[i]; 2400 ui[i+1] = ui[i] + ncols; 2401 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2402 } 2403 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2404 cols = uj; 2405 for (i=0; i<am; i++) { 2406 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2407 ncols = ai[i+1] - a->diag[i] -1; 2408 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2409 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2410 } 2411 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2412 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2413 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2414 2415 /* initialization */ 2416 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2417 2418 /* jl: linked list for storing indices of the pivot rows 2419 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2420 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2421 for (i=0; i<am; i++) { 2422 jl[i] = am; il[i] = 0; 2423 } 2424 2425 /* create and initialize a linked list for storing column indices of the active row k */ 2426 nlnk = am + 1; 2427 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2428 2429 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2430 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2431 current_space = free_space; 2432 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2433 current_space_lvl = free_space_lvl; 2434 2435 for (k=0; k<am; k++) { /* for each active row k */ 2436 /* initialize lnk by the column indices of row rip[k] of A */ 2437 nzk = 0; 2438 ncols = ai[rip[k]+1] - ai[rip[k]]; 2439 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2440 ncols_upper = 0; 2441 for (j=0; j<ncols; j++) { 2442 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2443 if (riip[i] >= k) { /* only take upper triangular entry */ 2444 ajtmp[ncols_upper] = i; 2445 ncols_upper++; 2446 } 2447 } 2448 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2449 nzk += nlnk; 2450 2451 /* update lnk by computing fill-in for each pivot row to be merged in */ 2452 prow = jl[k]; /* 1st pivot row */ 2453 2454 while (prow < k) { 2455 nextprow = jl[prow]; 2456 2457 /* merge prow into k-th row */ 2458 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2459 jmax = ui[prow+1]; 2460 ncols = jmax-jmin; 2461 i = jmin - ui[prow]; 2462 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2463 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2464 j = *(uj - 1); 2465 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2466 nzk += nlnk; 2467 2468 /* update il and jl for prow */ 2469 if (jmin < jmax) { 2470 il[prow] = jmin; 2471 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2472 } 2473 prow = nextprow; 2474 } 2475 2476 /* if free space is not available, make more free space */ 2477 if (current_space->local_remaining<nzk) { 2478 i = am - k + 1; /* num of unfactored rows */ 2479 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2480 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2481 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2482 reallocs++; 2483 } 2484 2485 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2486 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2487 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2488 2489 /* add the k-th row into il and jl */ 2490 if (nzk > 1) { 2491 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2492 jl[k] = jl[i]; jl[i] = k; 2493 il[k] = ui[k] + 1; 2494 } 2495 uj_ptr[k] = current_space->array; 2496 uj_lvl_ptr[k] = current_space_lvl->array; 2497 2498 current_space->array += nzk; 2499 current_space->local_used += nzk; 2500 current_space->local_remaining -= nzk; 2501 2502 current_space_lvl->array += nzk; 2503 current_space_lvl->local_used += nzk; 2504 current_space_lvl->local_remaining -= nzk; 2505 2506 ui[k+1] = ui[k] + nzk; 2507 } 2508 2509 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2510 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2511 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2512 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2513 2514 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2515 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2516 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2517 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2518 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2519 2520 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2521 2522 /* put together the new matrix in MATSEQSBAIJ format */ 2523 b = (Mat_SeqSBAIJ*)(fact)->data; 2524 b->singlemalloc = PETSC_FALSE; 2525 2526 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2527 2528 b->j = uj; 2529 b->i = ui; 2530 b->diag = udiag; 2531 b->free_diag = PETSC_TRUE; 2532 b->ilen = 0; 2533 b->imax = 0; 2534 b->row = perm; 2535 b->col = perm; 2536 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2537 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2538 b->icol = iperm; 2539 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2540 2541 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2542 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2543 2544 b->maxnz = b->nz = ui[am]; 2545 b->free_a = PETSC_TRUE; 2546 b->free_ij = PETSC_TRUE; 2547 2548 fact->info.factor_mallocs = reallocs; 2549 fact->info.fill_ratio_given = fill; 2550 if (ai[am] != 0) { 2551 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2552 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2553 } else { 2554 fact->info.fill_ratio_needed = 0.0; 2555 } 2556 #if defined(PETSC_USE_INFO) 2557 if (ai[am] != 0) { 2558 PetscReal af = fact->info.fill_ratio_needed; 2559 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2560 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2561 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2562 } else { 2563 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 2564 } 2565 #endif 2566 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2567 PetscFunctionReturn(0); 2568 } 2569 2570 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2571 { 2572 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2573 Mat_SeqSBAIJ *b; 2574 PetscErrorCode ierr; 2575 PetscBool perm_identity,missing; 2576 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2577 const PetscInt *rip,*riip; 2578 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2579 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2580 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2581 PetscReal fill =info->fill,levels=info->levels; 2582 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2583 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2584 PetscBT lnkbt; 2585 IS iperm; 2586 2587 PetscFunctionBegin; 2588 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2589 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2590 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2591 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2592 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2593 2594 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2595 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2596 ui[0] = 0; 2597 2598 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2599 if (!levels && perm_identity) { 2600 2601 for (i=0; i<am; i++) { 2602 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2603 udiag[i] = ui[i]; 2604 } 2605 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2606 cols = uj; 2607 for (i=0; i<am; i++) { 2608 aj = a->j + a->diag[i]; 2609 ncols = ui[i+1] - ui[i]; 2610 for (j=0; j<ncols; j++) *cols++ = *aj++; 2611 } 2612 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2613 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2614 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2615 2616 /* initialization */ 2617 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2618 2619 /* jl: linked list for storing indices of the pivot rows 2620 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2621 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2622 for (i=0; i<am; i++) { 2623 jl[i] = am; il[i] = 0; 2624 } 2625 2626 /* create and initialize a linked list for storing column indices of the active row k */ 2627 nlnk = am + 1; 2628 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2629 2630 /* initial FreeSpace size is fill*(ai[am]+1) */ 2631 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr); 2632 current_space = free_space; 2633 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);CHKERRQ(ierr); 2634 current_space_lvl = free_space_lvl; 2635 2636 for (k=0; k<am; k++) { /* for each active row k */ 2637 /* initialize lnk by the column indices of row rip[k] of A */ 2638 nzk = 0; 2639 ncols = ai[rip[k]+1] - ai[rip[k]]; 2640 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2641 ncols_upper = 0; 2642 for (j=0; j<ncols; j++) { 2643 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2644 if (riip[i] >= k) { /* only take upper triangular entry */ 2645 ajtmp[ncols_upper] = i; 2646 ncols_upper++; 2647 } 2648 } 2649 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2650 nzk += nlnk; 2651 2652 /* update lnk by computing fill-in for each pivot row to be merged in */ 2653 prow = jl[k]; /* 1st pivot row */ 2654 2655 while (prow < k) { 2656 nextprow = jl[prow]; 2657 2658 /* merge prow into k-th row */ 2659 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2660 jmax = ui[prow+1]; 2661 ncols = jmax-jmin; 2662 i = jmin - ui[prow]; 2663 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2664 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2665 j = *(uj - 1); 2666 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2667 nzk += nlnk; 2668 2669 /* update il and jl for prow */ 2670 if (jmin < jmax) { 2671 il[prow] = jmin; 2672 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2673 } 2674 prow = nextprow; 2675 } 2676 2677 /* if free space is not available, make more free space */ 2678 if (current_space->local_remaining<nzk) { 2679 i = am - k + 1; /* num of unfactored rows */ 2680 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2681 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2682 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2683 reallocs++; 2684 } 2685 2686 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2687 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2688 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2689 2690 /* add the k-th row into il and jl */ 2691 if (nzk > 1) { 2692 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2693 jl[k] = jl[i]; jl[i] = k; 2694 il[k] = ui[k] + 1; 2695 } 2696 uj_ptr[k] = current_space->array; 2697 uj_lvl_ptr[k] = current_space_lvl->array; 2698 2699 current_space->array += nzk; 2700 current_space->local_used += nzk; 2701 current_space->local_remaining -= nzk; 2702 2703 current_space_lvl->array += nzk; 2704 current_space_lvl->local_used += nzk; 2705 current_space_lvl->local_remaining -= nzk; 2706 2707 ui[k+1] = ui[k] + nzk; 2708 } 2709 2710 #if defined(PETSC_USE_INFO) 2711 if (ai[am] != 0) { 2712 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2713 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2714 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2715 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2716 } else { 2717 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 2718 } 2719 #endif 2720 2721 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2722 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2723 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2724 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2725 2726 /* destroy list of free space and other temporary array(s) */ 2727 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2728 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2729 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2730 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2731 2732 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2733 2734 /* put together the new matrix in MATSEQSBAIJ format */ 2735 2736 b = (Mat_SeqSBAIJ*)fact->data; 2737 b->singlemalloc = PETSC_FALSE; 2738 2739 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2740 2741 b->j = uj; 2742 b->i = ui; 2743 b->diag = udiag; 2744 b->free_diag = PETSC_TRUE; 2745 b->ilen = 0; 2746 b->imax = 0; 2747 b->row = perm; 2748 b->col = perm; 2749 2750 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2751 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2752 2753 b->icol = iperm; 2754 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2755 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2756 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2757 b->maxnz = b->nz = ui[am]; 2758 b->free_a = PETSC_TRUE; 2759 b->free_ij = PETSC_TRUE; 2760 2761 fact->info.factor_mallocs = reallocs; 2762 fact->info.fill_ratio_given = fill; 2763 if (ai[am] != 0) { 2764 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2765 } else { 2766 fact->info.fill_ratio_needed = 0.0; 2767 } 2768 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2769 PetscFunctionReturn(0); 2770 } 2771 2772 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2773 { 2774 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2775 Mat_SeqSBAIJ *b; 2776 PetscErrorCode ierr; 2777 PetscBool perm_identity,missing; 2778 PetscReal fill = info->fill; 2779 const PetscInt *rip,*riip; 2780 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2781 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2782 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2783 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2784 PetscBT lnkbt; 2785 IS iperm; 2786 2787 PetscFunctionBegin; 2788 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2789 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2790 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2791 2792 /* check whether perm is the identity mapping */ 2793 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2794 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2795 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2796 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2797 2798 /* initialization */ 2799 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2800 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2801 ui[0] = 0; 2802 2803 /* jl: linked list for storing indices of the pivot rows 2804 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2805 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2806 for (i=0; i<am; i++) { 2807 jl[i] = am; il[i] = 0; 2808 } 2809 2810 /* create and initialize a linked list for storing column indices of the active row k */ 2811 nlnk = am + 1; 2812 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2813 2814 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2815 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2816 current_space = free_space; 2817 2818 for (k=0; k<am; k++) { /* for each active row k */ 2819 /* initialize lnk by the column indices of row rip[k] of A */ 2820 nzk = 0; 2821 ncols = ai[rip[k]+1] - ai[rip[k]]; 2822 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2823 ncols_upper = 0; 2824 for (j=0; j<ncols; j++) { 2825 i = riip[*(aj + ai[rip[k]] + j)]; 2826 if (i >= k) { /* only take upper triangular entry */ 2827 cols[ncols_upper] = i; 2828 ncols_upper++; 2829 } 2830 } 2831 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2832 nzk += nlnk; 2833 2834 /* update lnk by computing fill-in for each pivot row to be merged in */ 2835 prow = jl[k]; /* 1st pivot row */ 2836 2837 while (prow < k) { 2838 nextprow = jl[prow]; 2839 /* merge prow into k-th row */ 2840 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2841 jmax = ui[prow+1]; 2842 ncols = jmax-jmin; 2843 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2844 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2845 nzk += nlnk; 2846 2847 /* update il and jl for prow */ 2848 if (jmin < jmax) { 2849 il[prow] = jmin; 2850 j = *uj_ptr; 2851 jl[prow] = jl[j]; 2852 jl[j] = prow; 2853 } 2854 prow = nextprow; 2855 } 2856 2857 /* if free space is not available, make more free space */ 2858 if (current_space->local_remaining<nzk) { 2859 i = am - k + 1; /* num of unfactored rows */ 2860 i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2861 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2862 reallocs++; 2863 } 2864 2865 /* copy data into free space, then initialize lnk */ 2866 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2867 2868 /* add the k-th row into il and jl */ 2869 if (nzk > 1) { 2870 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2871 jl[k] = jl[i]; jl[i] = k; 2872 il[k] = ui[k] + 1; 2873 } 2874 ui_ptr[k] = current_space->array; 2875 2876 current_space->array += nzk; 2877 current_space->local_used += nzk; 2878 current_space->local_remaining -= nzk; 2879 2880 ui[k+1] = ui[k] + nzk; 2881 } 2882 2883 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2884 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2885 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2886 2887 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2888 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2889 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2890 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2891 2892 /* put together the new matrix in MATSEQSBAIJ format */ 2893 2894 b = (Mat_SeqSBAIJ*)fact->data; 2895 b->singlemalloc = PETSC_FALSE; 2896 b->free_a = PETSC_TRUE; 2897 b->free_ij = PETSC_TRUE; 2898 2899 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2900 2901 b->j = uj; 2902 b->i = ui; 2903 b->diag = udiag; 2904 b->free_diag = PETSC_TRUE; 2905 b->ilen = 0; 2906 b->imax = 0; 2907 b->row = perm; 2908 b->col = perm; 2909 2910 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2911 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2912 2913 b->icol = iperm; 2914 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2915 2916 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2917 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2918 2919 b->maxnz = b->nz = ui[am]; 2920 2921 fact->info.factor_mallocs = reallocs; 2922 fact->info.fill_ratio_given = fill; 2923 if (ai[am] != 0) { 2924 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2925 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2926 } else { 2927 fact->info.fill_ratio_needed = 0.0; 2928 } 2929 #if defined(PETSC_USE_INFO) 2930 if (ai[am] != 0) { 2931 PetscReal af = fact->info.fill_ratio_needed; 2932 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2933 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2934 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2935 } else { 2936 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 2937 } 2938 #endif 2939 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2940 PetscFunctionReturn(0); 2941 } 2942 2943 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2944 { 2945 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2946 Mat_SeqSBAIJ *b; 2947 PetscErrorCode ierr; 2948 PetscBool perm_identity,missing; 2949 PetscReal fill = info->fill; 2950 const PetscInt *rip,*riip; 2951 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2952 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2953 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2954 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2955 PetscBT lnkbt; 2956 IS iperm; 2957 2958 PetscFunctionBegin; 2959 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2960 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2961 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2962 2963 /* check whether perm is the identity mapping */ 2964 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2965 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2966 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2967 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2968 2969 /* initialization */ 2970 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2971 ui[0] = 0; 2972 2973 /* jl: linked list for storing indices of the pivot rows 2974 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2975 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2976 for (i=0; i<am; i++) { 2977 jl[i] = am; il[i] = 0; 2978 } 2979 2980 /* create and initialize a linked list for storing column indices of the active row k */ 2981 nlnk = am + 1; 2982 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2983 2984 /* initial FreeSpace size is fill*(ai[am]+1) */ 2985 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr); 2986 current_space = free_space; 2987 2988 for (k=0; k<am; k++) { /* for each active row k */ 2989 /* initialize lnk by the column indices of row rip[k] of A */ 2990 nzk = 0; 2991 ncols = ai[rip[k]+1] - ai[rip[k]]; 2992 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2993 ncols_upper = 0; 2994 for (j=0; j<ncols; j++) { 2995 i = riip[*(aj + ai[rip[k]] + j)]; 2996 if (i >= k) { /* only take upper triangular entry */ 2997 cols[ncols_upper] = i; 2998 ncols_upper++; 2999 } 3000 } 3001 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3002 nzk += nlnk; 3003 3004 /* update lnk by computing fill-in for each pivot row to be merged in */ 3005 prow = jl[k]; /* 1st pivot row */ 3006 3007 while (prow < k) { 3008 nextprow = jl[prow]; 3009 /* merge prow into k-th row */ 3010 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3011 jmax = ui[prow+1]; 3012 ncols = jmax-jmin; 3013 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3014 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3015 nzk += nlnk; 3016 3017 /* update il and jl for prow */ 3018 if (jmin < jmax) { 3019 il[prow] = jmin; 3020 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3021 } 3022 prow = nextprow; 3023 } 3024 3025 /* if free space is not available, make more free space */ 3026 if (current_space->local_remaining<nzk) { 3027 i = am - k + 1; /* num of unfactored rows */ 3028 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3029 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3030 reallocs++; 3031 } 3032 3033 /* copy data into free space, then initialize lnk */ 3034 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3035 3036 /* add the k-th row into il and jl */ 3037 if (nzk-1 > 0) { 3038 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3039 jl[k] = jl[i]; jl[i] = k; 3040 il[k] = ui[k] + 1; 3041 } 3042 ui_ptr[k] = current_space->array; 3043 3044 current_space->array += nzk; 3045 current_space->local_used += nzk; 3046 current_space->local_remaining -= nzk; 3047 3048 ui[k+1] = ui[k] + nzk; 3049 } 3050 3051 #if defined(PETSC_USE_INFO) 3052 if (ai[am] != 0) { 3053 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3054 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 3055 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 3056 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 3057 } else { 3058 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 3059 } 3060 #endif 3061 3062 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3063 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3064 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3065 3066 /* destroy list of free space and other temporary array(s) */ 3067 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 3068 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3069 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3070 3071 /* put together the new matrix in MATSEQSBAIJ format */ 3072 3073 b = (Mat_SeqSBAIJ*)fact->data; 3074 b->singlemalloc = PETSC_FALSE; 3075 b->free_a = PETSC_TRUE; 3076 b->free_ij = PETSC_TRUE; 3077 3078 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 3079 3080 b->j = uj; 3081 b->i = ui; 3082 b->diag = 0; 3083 b->ilen = 0; 3084 b->imax = 0; 3085 b->row = perm; 3086 b->col = perm; 3087 3088 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3089 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3090 3091 b->icol = iperm; 3092 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3093 3094 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 3095 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3096 b->maxnz = b->nz = ui[am]; 3097 3098 fact->info.factor_mallocs = reallocs; 3099 fact->info.fill_ratio_given = fill; 3100 if (ai[am] != 0) { 3101 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3102 } else { 3103 fact->info.fill_ratio_needed = 0.0; 3104 } 3105 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3106 PetscFunctionReturn(0); 3107 } 3108 3109 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3110 { 3111 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3112 PetscErrorCode ierr; 3113 PetscInt n = A->rmap->n; 3114 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3115 PetscScalar *x,sum; 3116 const PetscScalar *b; 3117 const MatScalar *aa = a->a,*v; 3118 PetscInt i,nz; 3119 3120 PetscFunctionBegin; 3121 if (!n) PetscFunctionReturn(0); 3122 3123 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3124 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 3125 3126 /* forward solve the lower triangular */ 3127 x[0] = b[0]; 3128 v = aa; 3129 vi = aj; 3130 for (i=1; i<n; i++) { 3131 nz = ai[i+1] - ai[i]; 3132 sum = b[i]; 3133 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3134 v += nz; 3135 vi += nz; 3136 x[i] = sum; 3137 } 3138 3139 /* backward solve the upper triangular */ 3140 for (i=n-1; i>=0; i--) { 3141 v = aa + adiag[i+1] + 1; 3142 vi = aj + adiag[i+1] + 1; 3143 nz = adiag[i] - adiag[i+1]-1; 3144 sum = x[i]; 3145 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3146 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3147 } 3148 3149 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3150 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3151 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 3152 PetscFunctionReturn(0); 3153 } 3154 3155 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3156 { 3157 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3158 IS iscol = a->col,isrow = a->row; 3159 PetscErrorCode ierr; 3160 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3161 const PetscInt *rout,*cout,*r,*c; 3162 PetscScalar *x,*tmp,sum; 3163 const PetscScalar *b; 3164 const MatScalar *aa = a->a,*v; 3165 3166 PetscFunctionBegin; 3167 if (!n) PetscFunctionReturn(0); 3168 3169 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3170 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 3171 tmp = a->solve_work; 3172 3173 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3174 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3175 3176 /* forward solve the lower triangular */ 3177 tmp[0] = b[r[0]]; 3178 v = aa; 3179 vi = aj; 3180 for (i=1; i<n; i++) { 3181 nz = ai[i+1] - ai[i]; 3182 sum = b[r[i]]; 3183 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3184 tmp[i] = sum; 3185 v += nz; vi += nz; 3186 } 3187 3188 /* backward solve the upper triangular */ 3189 for (i=n-1; i>=0; i--) { 3190 v = aa + adiag[i+1]+1; 3191 vi = aj + adiag[i+1]+1; 3192 nz = adiag[i]-adiag[i+1]-1; 3193 sum = tmp[i]; 3194 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3195 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3196 } 3197 3198 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3199 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3200 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3201 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 3202 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3203 PetscFunctionReturn(0); 3204 } 3205 3206 /* 3207 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3208 */ 3209 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3210 { 3211 Mat B = *fact; 3212 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3213 IS isicol; 3214 PetscErrorCode ierr; 3215 const PetscInt *r,*ic; 3216 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3217 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3218 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3219 PetscInt nlnk,*lnk; 3220 PetscBT lnkbt; 3221 PetscBool row_identity,icol_identity; 3222 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3223 const PetscInt *ics; 3224 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3225 PetscReal dt =info->dt,shift=info->shiftamount; 3226 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3227 PetscBool missing; 3228 3229 PetscFunctionBegin; 3230 if (dt == PETSC_DEFAULT) dt = 0.005; 3231 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3232 3233 /* ------- symbolic factorization, can be reused ---------*/ 3234 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3235 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3236 adiag=a->diag; 3237 3238 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3239 3240 /* bdiag is location of diagonal in factor */ 3241 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3242 ierr = PetscMalloc1(n+1,&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3243 3244 /* allocate row pointers bi */ 3245 ierr = PetscMalloc1(2*n+2,&bi);CHKERRQ(ierr); 3246 3247 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3248 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3249 nnz_max = ai[n]+2*n*dtcount+2; 3250 3251 ierr = PetscMalloc1(nnz_max+1,&bj);CHKERRQ(ierr); 3252 ierr = PetscMalloc1(nnz_max+1,&ba);CHKERRQ(ierr); 3253 3254 /* put together the new matrix */ 3255 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3256 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 3257 b = (Mat_SeqAIJ*)B->data; 3258 3259 b->free_a = PETSC_TRUE; 3260 b->free_ij = PETSC_TRUE; 3261 b->singlemalloc = PETSC_FALSE; 3262 3263 b->a = ba; 3264 b->j = bj; 3265 b->i = bi; 3266 b->diag = bdiag; 3267 b->ilen = 0; 3268 b->imax = 0; 3269 b->row = isrow; 3270 b->col = iscol; 3271 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3272 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3273 b->icol = isicol; 3274 3275 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 3276 ierr = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3277 b->maxnz = nnz_max; 3278 3279 B->factortype = MAT_FACTOR_ILUDT; 3280 B->info.factor_mallocs = 0; 3281 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3282 /* ------- end of symbolic factorization ---------*/ 3283 3284 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3285 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3286 ics = ic; 3287 3288 /* linked list for storing column indices of the active row */ 3289 nlnk = n + 1; 3290 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3291 3292 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3293 ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr); 3294 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3295 ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr); 3296 ierr = PetscArrayzero(rtmp,n);CHKERRQ(ierr); 3297 3298 bi[0] = 0; 3299 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3300 bdiag_rev[n] = bdiag[0]; 3301 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3302 for (i=0; i<n; i++) { 3303 /* copy initial fill into linked list */ 3304 nzi = ai[r[i]+1] - ai[r[i]]; 3305 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3306 nzi_al = adiag[r[i]] - ai[r[i]]; 3307 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3308 ajtmp = aj + ai[r[i]]; 3309 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3310 3311 /* load in initial (unfactored row) */ 3312 aatmp = a->a + ai[r[i]]; 3313 for (j=0; j<nzi; j++) { 3314 rtmp[ics[*ajtmp++]] = *aatmp++; 3315 } 3316 3317 /* add pivot rows into linked list */ 3318 row = lnk[n]; 3319 while (row < i) { 3320 nzi_bl = bi[row+1] - bi[row] + 1; 3321 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3322 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3323 nzi += nlnk; 3324 row = lnk[row]; 3325 } 3326 3327 /* copy data from lnk into jtmp, then initialize lnk */ 3328 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3329 3330 /* numerical factorization */ 3331 bjtmp = jtmp; 3332 row = *bjtmp++; /* 1st pivot row */ 3333 while (row < i) { 3334 pc = rtmp + row; 3335 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3336 multiplier = (*pc) * (*pv); 3337 *pc = multiplier; 3338 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3339 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3340 pv = ba + bdiag[row+1] + 1; 3341 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3342 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3343 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3344 } 3345 row = *bjtmp++; 3346 } 3347 3348 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3349 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3350 nzi_bl = 0; j = 0; 3351 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3352 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3353 nzi_bl++; j++; 3354 } 3355 nzi_bu = nzi - nzi_bl -1; 3356 while (j < nzi) { 3357 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3358 j++; 3359 } 3360 3361 bjtmp = bj + bi[i]; 3362 batmp = ba + bi[i]; 3363 /* apply level dropping rule to L part */ 3364 ncut = nzi_al + dtcount; 3365 if (ncut < nzi_bl) { 3366 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3367 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3368 } else { 3369 ncut = nzi_bl; 3370 } 3371 for (j=0; j<ncut; j++) { 3372 bjtmp[j] = jtmp[j]; 3373 batmp[j] = vtmp[j]; 3374 } 3375 bi[i+1] = bi[i] + ncut; 3376 nzi = ncut + 1; 3377 3378 /* apply level dropping rule to U part */ 3379 ncut = nzi_au + dtcount; 3380 if (ncut < nzi_bu) { 3381 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3382 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3383 } else { 3384 ncut = nzi_bu; 3385 } 3386 nzi += ncut; 3387 3388 /* mark bdiagonal */ 3389 bdiag[i+1] = bdiag[i] - (ncut + 1); 3390 bdiag_rev[n-i-1] = bdiag[i+1]; 3391 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3392 bjtmp = bj + bdiag[i]; 3393 batmp = ba + bdiag[i]; 3394 *bjtmp = i; 3395 *batmp = diag_tmp; /* rtmp[i]; */ 3396 if (*batmp == 0.0) { 3397 *batmp = dt+shift; 3398 } 3399 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3400 3401 bjtmp = bj + bdiag[i+1]+1; 3402 batmp = ba + bdiag[i+1]+1; 3403 for (k=0; k<ncut; k++) { 3404 bjtmp[k] = jtmp[nzi_bl+1+k]; 3405 batmp[k] = vtmp[nzi_bl+1+k]; 3406 } 3407 3408 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3409 } /* for (i=0; i<n; i++) */ 3410 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3411 3412 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3413 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3414 3415 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3416 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3417 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3418 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3419 3420 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3421 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3422 3423 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3424 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3425 if (row_identity && icol_identity) { 3426 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3427 } else { 3428 B->ops->solve = MatSolve_SeqAIJ; 3429 } 3430 3431 B->ops->solveadd = 0; 3432 B->ops->solvetranspose = 0; 3433 B->ops->solvetransposeadd = 0; 3434 B->ops->matsolve = 0; 3435 B->assembled = PETSC_TRUE; 3436 B->preallocated = PETSC_TRUE; 3437 PetscFunctionReturn(0); 3438 } 3439 3440 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3441 /* 3442 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3443 */ 3444 3445 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3446 { 3447 PetscErrorCode ierr; 3448 3449 PetscFunctionBegin; 3450 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3451 PetscFunctionReturn(0); 3452 } 3453 3454 /* 3455 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3456 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3457 */ 3458 /* 3459 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3460 */ 3461 3462 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3463 { 3464 Mat C =fact; 3465 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3466 IS isrow = b->row,isicol = b->icol; 3467 PetscErrorCode ierr; 3468 const PetscInt *r,*ic,*ics; 3469 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3470 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3471 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3472 PetscReal dt=info->dt,shift=info->shiftamount; 3473 PetscBool row_identity, col_identity; 3474 3475 PetscFunctionBegin; 3476 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3477 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3478 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 3479 ics = ic; 3480 3481 for (i=0; i<n; i++) { 3482 /* initialize rtmp array */ 3483 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3484 bjtmp = bj + bi[i]; 3485 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3486 rtmp[i] = 0.0; 3487 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3488 bjtmp = bj + bdiag[i+1] + 1; 3489 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3490 3491 /* load in initial unfactored row of A */ 3492 nz = ai[r[i]+1] - ai[r[i]]; 3493 ajtmp = aj + ai[r[i]]; 3494 v = aa + ai[r[i]]; 3495 for (j=0; j<nz; j++) { 3496 rtmp[ics[*ajtmp++]] = v[j]; 3497 } 3498 3499 /* numerical factorization */ 3500 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3501 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3502 k = 0; 3503 while (k < nzl) { 3504 row = *bjtmp++; 3505 pc = rtmp + row; 3506 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3507 multiplier = (*pc) * (*pv); 3508 *pc = multiplier; 3509 if (PetscAbsScalar(multiplier) > dt) { 3510 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3511 pv = b->a + bdiag[row+1] + 1; 3512 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3513 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3514 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3515 } 3516 k++; 3517 } 3518 3519 /* finished row so stick it into b->a */ 3520 /* L-part */ 3521 pv = b->a + bi[i]; 3522 pj = bj + bi[i]; 3523 nzl = bi[i+1] - bi[i]; 3524 for (j=0; j<nzl; j++) { 3525 pv[j] = rtmp[pj[j]]; 3526 } 3527 3528 /* diagonal: invert diagonal entries for simplier triangular solves */ 3529 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3530 b->a[bdiag[i]] = 1.0/rtmp[i]; 3531 3532 /* U-part */ 3533 pv = b->a + bdiag[i+1] + 1; 3534 pj = bj + bdiag[i+1] + 1; 3535 nzu = bdiag[i] - bdiag[i+1] - 1; 3536 for (j=0; j<nzu; j++) { 3537 pv[j] = rtmp[pj[j]]; 3538 } 3539 } 3540 3541 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3542 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3543 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3544 3545 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3546 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3547 if (row_identity && col_identity) { 3548 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3549 } else { 3550 C->ops->solve = MatSolve_SeqAIJ; 3551 } 3552 C->ops->solveadd = 0; 3553 C->ops->solvetranspose = 0; 3554 C->ops->solvetransposeadd = 0; 3555 C->ops->matsolve = 0; 3556 C->assembled = PETSC_TRUE; 3557 C->preallocated = PETSC_TRUE; 3558 3559 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3560 PetscFunctionReturn(0); 3561 } 3562