1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 /* 8 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 9 10 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 11 */ 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 13 { 14 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 15 PetscErrorCode ierr; 16 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 17 const PetscInt *ai = a->i, *aj = a->j; 18 const PetscScalar *aa = a->a; 19 PetscBool *done; 20 PetscReal best,past = 0,future; 21 22 PetscFunctionBegin; 23 /* pick initial row */ 24 best = -1; 25 for (i=0; i<n; i++) { 26 future = 0.0; 27 for (j=ai[i]; j<ai[i+1]; j++) { 28 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 29 else past = PetscAbsScalar(aa[j]); 30 } 31 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 32 if (past/future > best) { 33 best = past/future; 34 current = i; 35 } 36 } 37 38 ierr = PetscMalloc1(n,&done);CHKERRQ(ierr); 39 ierr = PetscArrayzero(done,n);CHKERRQ(ierr); 40 ierr = PetscMalloc1(n,&order);CHKERRQ(ierr); 41 order[0] = current; 42 for (i=0; i<n-1; i++) { 43 done[current] = PETSC_TRUE; 44 best = -1; 45 /* loop over all neighbors of current pivot */ 46 for (j=ai[current]; j<ai[current+1]; j++) { 47 jj = aj[j]; 48 if (done[jj]) continue; 49 /* loop over columns of potential next row computing weights for below and above diagonal */ 50 past = future = 0.0; 51 for (k=ai[jj]; k<ai[jj+1]; k++) { 52 kk = aj[k]; 53 if (done[kk]) past += PetscAbsScalar(aa[k]); 54 else if (kk != jj) future += PetscAbsScalar(aa[k]); 55 } 56 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 57 if (past/future > best) { 58 best = past/future; 59 newcurrent = jj; 60 } 61 } 62 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 63 best = -1; 64 for (k=0; k<n; k++) { 65 if (done[k]) continue; 66 future = 0.0; 67 past = 0.0; 68 for (j=ai[k]; j<ai[k+1]; j++) { 69 kk = aj[j]; 70 if (done[kk]) past += PetscAbsScalar(aa[j]); 71 else if (kk != k) future += PetscAbsScalar(aa[j]); 72 } 73 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 74 if (past/future > best) { 75 best = past/future; 76 newcurrent = k; 77 } 78 } 79 } 80 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 81 current = newcurrent; 82 order[i+1] = current; 83 } 84 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 85 *icol = *irow; 86 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 87 ierr = PetscFree(done);CHKERRQ(ierr); 88 ierr = PetscFree(order);CHKERRQ(ierr); 89 PetscFunctionReturn(0); 90 } 91 92 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 93 { 94 PetscInt n = A->rmap->n; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 #if defined(PETSC_USE_COMPLEX) 99 if (A->hermitian && !A->symmetric && (ftype == MAT_FACTOR_CHOLESKY||ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY or ICC Factor is not supported"); 100 #endif 101 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 102 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 103 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 104 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 105 106 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 107 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 108 109 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 110 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 111 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 112 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 113 114 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 115 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 116 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 117 (*B)->factortype = ftype; 118 119 ierr = PetscFree((*B)->solvertype);CHKERRQ(ierr); 120 ierr = PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 125 { 126 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 127 IS isicol; 128 PetscErrorCode ierr; 129 const PetscInt *r,*ic; 130 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 131 PetscInt *bi,*bj,*ajtmp; 132 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 133 PetscReal f; 134 PetscInt nlnk,*lnk,k,**bi_ptr; 135 PetscFreeSpaceList free_space=NULL,current_space=NULL; 136 PetscBT lnkbt; 137 PetscBool missing; 138 139 PetscFunctionBegin; 140 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 141 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 142 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 143 144 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 145 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 146 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 147 148 /* get new row pointers */ 149 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 150 bi[0] = 0; 151 152 /* bdiag is location of diagonal in factor */ 153 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 154 bdiag[0] = 0; 155 156 /* linked list for storing column indices of the active row */ 157 nlnk = n + 1; 158 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 159 160 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 161 162 /* initial FreeSpace size is f*(ai[n]+1) */ 163 f = info->fill; 164 if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 165 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 166 current_space = free_space; 167 168 for (i=0; i<n; i++) { 169 /* copy previous fill into linked list */ 170 nzi = 0; 171 nnz = ai[r[i]+1] - ai[r[i]]; 172 ajtmp = aj + ai[r[i]]; 173 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 174 nzi += nlnk; 175 176 /* add pivot rows into linked list */ 177 row = lnk[n]; 178 while (row < i) { 179 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 180 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 181 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 182 nzi += nlnk; 183 row = lnk[row]; 184 } 185 bi[i+1] = bi[i] + nzi; 186 im[i] = nzi; 187 188 /* mark bdiag */ 189 nzbd = 0; 190 nnz = nzi; 191 k = lnk[n]; 192 while (nnz-- && k < i) { 193 nzbd++; 194 k = lnk[k]; 195 } 196 bdiag[i] = bi[i] + nzbd; 197 198 /* if free space is not available, make more free space */ 199 if (current_space->local_remaining<nzi) { 200 nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */ 201 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 202 reallocs++; 203 } 204 205 /* copy data into free space, then initialize lnk */ 206 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 207 208 bi_ptr[i] = current_space->array; 209 current_space->array += nzi; 210 current_space->local_used += nzi; 211 current_space->local_remaining -= nzi; 212 } 213 #if defined(PETSC_USE_INFO) 214 if (ai[n] != 0) { 215 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 216 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 217 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 218 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 219 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 220 } else { 221 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 222 } 223 #endif 224 225 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 226 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 227 228 /* destroy list of free space and other temporary array(s) */ 229 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 230 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 231 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 232 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 233 234 /* put together the new matrix */ 235 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 236 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 237 b = (Mat_SeqAIJ*)(B)->data; 238 239 b->free_a = PETSC_TRUE; 240 b->free_ij = PETSC_TRUE; 241 b->singlemalloc = PETSC_FALSE; 242 243 ierr = PetscMalloc1(bi[n]+1,&b->a);CHKERRQ(ierr); 244 b->j = bj; 245 b->i = bi; 246 b->diag = bdiag; 247 b->ilen = 0; 248 b->imax = 0; 249 b->row = isrow; 250 b->col = iscol; 251 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 252 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 253 b->icol = isicol; 254 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 255 256 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 257 ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 258 b->maxnz = b->nz = bi[n]; 259 260 (B)->factortype = MAT_FACTOR_LU; 261 (B)->info.factor_mallocs = reallocs; 262 (B)->info.fill_ratio_given = f; 263 264 if (ai[n]) { 265 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 266 } else { 267 (B)->info.fill_ratio_needed = 0.0; 268 } 269 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 270 if (a->inode.size) { 271 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 272 } 273 PetscFunctionReturn(0); 274 } 275 276 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 277 { 278 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 279 IS isicol; 280 PetscErrorCode ierr; 281 const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp; 282 PetscInt i,n=A->rmap->n; 283 PetscInt *bi,*bj; 284 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 285 PetscReal f; 286 PetscInt nlnk,*lnk,k,**bi_ptr; 287 PetscFreeSpaceList free_space=NULL,current_space=NULL; 288 PetscBT lnkbt; 289 PetscBool missing; 290 291 PetscFunctionBegin; 292 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 293 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 294 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 295 296 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 297 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 298 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 299 300 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 301 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 302 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 303 bi[0] = bdiag[0] = 0; 304 305 /* linked list for storing column indices of the active row */ 306 nlnk = n + 1; 307 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 308 309 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 310 311 /* initial FreeSpace size is f*(ai[n]+1) */ 312 f = info->fill; 313 if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 314 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 315 current_space = free_space; 316 317 for (i=0; i<n; i++) { 318 /* copy previous fill into linked list */ 319 nzi = 0; 320 nnz = ai[r[i]+1] - ai[r[i]]; 321 ajtmp = aj + ai[r[i]]; 322 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 323 nzi += nlnk; 324 325 /* add pivot rows into linked list */ 326 row = lnk[n]; 327 while (row < i) { 328 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 329 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 330 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 331 nzi += nlnk; 332 row = lnk[row]; 333 } 334 bi[i+1] = bi[i] + nzi; 335 im[i] = nzi; 336 337 /* mark bdiag */ 338 nzbd = 0; 339 nnz = nzi; 340 k = lnk[n]; 341 while (nnz-- && k < i) { 342 nzbd++; 343 k = lnk[k]; 344 } 345 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 346 347 /* if free space is not available, make more free space */ 348 if (current_space->local_remaining<nzi) { 349 /* estimated additional space needed */ 350 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi)); 351 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 352 reallocs++; 353 } 354 355 /* copy data into free space, then initialize lnk */ 356 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 357 358 bi_ptr[i] = current_space->array; 359 current_space->array += nzi; 360 current_space->local_used += nzi; 361 current_space->local_remaining -= nzi; 362 } 363 364 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 365 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 366 367 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 368 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 369 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 370 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 371 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 372 373 /* put together the new matrix */ 374 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 375 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 376 b = (Mat_SeqAIJ*)(B)->data; 377 378 b->free_a = PETSC_TRUE; 379 b->free_ij = PETSC_TRUE; 380 b->singlemalloc = PETSC_FALSE; 381 382 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 383 384 b->j = bj; 385 b->i = bi; 386 b->diag = bdiag; 387 b->ilen = 0; 388 b->imax = 0; 389 b->row = isrow; 390 b->col = iscol; 391 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 392 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 393 b->icol = isicol; 394 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 395 396 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 397 ierr = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 398 b->maxnz = b->nz = bdiag[0]+1; 399 400 B->factortype = MAT_FACTOR_LU; 401 B->info.factor_mallocs = reallocs; 402 B->info.fill_ratio_given = f; 403 404 if (ai[n]) { 405 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 406 } else { 407 B->info.fill_ratio_needed = 0.0; 408 } 409 #if defined(PETSC_USE_INFO) 410 if (ai[n] != 0) { 411 PetscReal af = B->info.fill_ratio_needed; 412 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 413 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 414 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 415 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 416 } else { 417 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 418 } 419 #endif 420 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 421 if (a->inode.size) { 422 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 423 } 424 ierr = MatSeqAIJCheckInode_FactorLU(B);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 /* 429 Trouble in factorization, should we dump the original matrix? 430 */ 431 PetscErrorCode MatFactorDumpMatrix(Mat A) 432 { 433 PetscErrorCode ierr; 434 PetscBool flg = PETSC_FALSE; 435 436 PetscFunctionBegin; 437 ierr = PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr); 438 if (flg) { 439 PetscViewer viewer; 440 char filename[PETSC_MAX_PATH_LEN]; 441 442 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 443 ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 444 ierr = MatView(A,viewer);CHKERRQ(ierr); 445 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 446 } 447 PetscFunctionReturn(0); 448 } 449 450 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 451 { 452 Mat C =B; 453 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 454 IS isrow = b->row,isicol = b->icol; 455 PetscErrorCode ierr; 456 const PetscInt *r,*ic,*ics; 457 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 458 PetscInt i,j,k,nz,nzL,row,*pj; 459 const PetscInt *ajtmp,*bjtmp; 460 MatScalar *rtmp,*pc,multiplier,*pv; 461 const MatScalar *aa=a->a,*v; 462 PetscBool row_identity,col_identity; 463 FactorShiftCtx sctx; 464 const PetscInt *ddiag; 465 PetscReal rs; 466 MatScalar d; 467 468 PetscFunctionBegin; 469 /* MatPivotSetUp(): initialize shift context sctx */ 470 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 471 472 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 473 ddiag = a->diag; 474 sctx.shift_top = info->zeropivot; 475 for (i=0; i<n; i++) { 476 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 477 d = (aa)[ddiag[i]]; 478 rs = -PetscAbsScalar(d) - PetscRealPart(d); 479 v = aa+ai[i]; 480 nz = ai[i+1] - ai[i]; 481 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 482 if (rs>sctx.shift_top) sctx.shift_top = rs; 483 } 484 sctx.shift_top *= 1.1; 485 sctx.nshift_max = 5; 486 sctx.shift_lo = 0.; 487 sctx.shift_hi = 1.; 488 } 489 490 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 491 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 492 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 493 ics = ic; 494 495 do { 496 sctx.newshift = PETSC_FALSE; 497 for (i=0; i<n; i++) { 498 /* zero rtmp */ 499 /* L part */ 500 nz = bi[i+1] - bi[i]; 501 bjtmp = bj + bi[i]; 502 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 503 504 /* U part */ 505 nz = bdiag[i]-bdiag[i+1]; 506 bjtmp = bj + bdiag[i+1]+1; 507 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 508 509 /* load in initial (unfactored row) */ 510 nz = ai[r[i]+1] - ai[r[i]]; 511 ajtmp = aj + ai[r[i]]; 512 v = aa + ai[r[i]]; 513 for (j=0; j<nz; j++) { 514 rtmp[ics[ajtmp[j]]] = v[j]; 515 } 516 /* ZeropivotApply() */ 517 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 518 519 /* elimination */ 520 bjtmp = bj + bi[i]; 521 row = *bjtmp++; 522 nzL = bi[i+1] - bi[i]; 523 for (k=0; k < nzL; k++) { 524 pc = rtmp + row; 525 if (*pc != 0.0) { 526 pv = b->a + bdiag[row]; 527 multiplier = *pc * (*pv); 528 *pc = multiplier; 529 530 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 531 pv = b->a + bdiag[row+1]+1; 532 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 533 534 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 535 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 536 } 537 row = *bjtmp++; 538 } 539 540 /* finished row so stick it into b->a */ 541 rs = 0.0; 542 /* L part */ 543 pv = b->a + bi[i]; 544 pj = b->j + bi[i]; 545 nz = bi[i+1] - bi[i]; 546 for (j=0; j<nz; j++) { 547 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 548 } 549 550 /* U part */ 551 pv = b->a + bdiag[i+1]+1; 552 pj = b->j + bdiag[i+1]+1; 553 nz = bdiag[i] - bdiag[i+1]-1; 554 for (j=0; j<nz; j++) { 555 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 556 } 557 558 sctx.rs = rs; 559 sctx.pv = rtmp[i]; 560 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 561 if (sctx.newshift) break; /* break for-loop */ 562 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 563 564 /* Mark diagonal and invert diagonal for simplier triangular solves */ 565 pv = b->a + bdiag[i]; 566 *pv = 1.0/rtmp[i]; 567 568 } /* endof for (i=0; i<n; i++) { */ 569 570 /* MatPivotRefine() */ 571 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 572 /* 573 * if no shift in this attempt & shifting & started shifting & can refine, 574 * then try lower shift 575 */ 576 sctx.shift_hi = sctx.shift_fraction; 577 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 578 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 579 sctx.newshift = PETSC_TRUE; 580 sctx.nshift++; 581 } 582 } while (sctx.newshift); 583 584 ierr = PetscFree(rtmp);CHKERRQ(ierr); 585 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 586 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 587 588 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 589 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 590 if (b->inode.size) { 591 C->ops->solve = MatSolve_SeqAIJ_Inode; 592 } else if (row_identity && col_identity) { 593 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 594 } else { 595 C->ops->solve = MatSolve_SeqAIJ; 596 } 597 C->ops->solveadd = MatSolveAdd_SeqAIJ; 598 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 599 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 600 C->ops->matsolve = MatMatSolve_SeqAIJ; 601 C->assembled = PETSC_TRUE; 602 C->preallocated = PETSC_TRUE; 603 604 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 605 606 /* MatShiftView(A,info,&sctx) */ 607 if (sctx.nshift) { 608 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 609 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 610 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 611 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 612 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 613 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 614 } 615 } 616 PetscFunctionReturn(0); 617 } 618 619 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 620 { 621 Mat C =B; 622 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 623 IS isrow = b->row,isicol = b->icol; 624 PetscErrorCode ierr; 625 const PetscInt *r,*ic,*ics; 626 PetscInt nz,row,i,j,n=A->rmap->n,diag; 627 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 628 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 629 MatScalar *pv,*rtmp,*pc,multiplier,d; 630 const MatScalar *v,*aa=a->a; 631 PetscReal rs=0.0; 632 FactorShiftCtx sctx; 633 const PetscInt *ddiag; 634 PetscBool row_identity, col_identity; 635 636 PetscFunctionBegin; 637 /* MatPivotSetUp(): initialize shift context sctx */ 638 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 639 640 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 641 ddiag = a->diag; 642 sctx.shift_top = info->zeropivot; 643 for (i=0; i<n; i++) { 644 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 645 d = (aa)[ddiag[i]]; 646 rs = -PetscAbsScalar(d) - PetscRealPart(d); 647 v = aa+ai[i]; 648 nz = ai[i+1] - ai[i]; 649 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 650 if (rs>sctx.shift_top) sctx.shift_top = rs; 651 } 652 sctx.shift_top *= 1.1; 653 sctx.nshift_max = 5; 654 sctx.shift_lo = 0.; 655 sctx.shift_hi = 1.; 656 } 657 658 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 659 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 660 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 661 ics = ic; 662 663 do { 664 sctx.newshift = PETSC_FALSE; 665 for (i=0; i<n; i++) { 666 nz = bi[i+1] - bi[i]; 667 bjtmp = bj + bi[i]; 668 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 669 670 /* load in initial (unfactored row) */ 671 nz = ai[r[i]+1] - ai[r[i]]; 672 ajtmp = aj + ai[r[i]]; 673 v = aa + ai[r[i]]; 674 for (j=0; j<nz; j++) { 675 rtmp[ics[ajtmp[j]]] = v[j]; 676 } 677 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 678 679 row = *bjtmp++; 680 while (row < i) { 681 pc = rtmp + row; 682 if (*pc != 0.0) { 683 pv = b->a + diag_offset[row]; 684 pj = b->j + diag_offset[row] + 1; 685 multiplier = *pc / *pv++; 686 *pc = multiplier; 687 nz = bi[row+1] - diag_offset[row] - 1; 688 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 689 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 690 } 691 row = *bjtmp++; 692 } 693 /* finished row so stick it into b->a */ 694 pv = b->a + bi[i]; 695 pj = b->j + bi[i]; 696 nz = bi[i+1] - bi[i]; 697 diag = diag_offset[i] - bi[i]; 698 rs = 0.0; 699 for (j=0; j<nz; j++) { 700 pv[j] = rtmp[pj[j]]; 701 rs += PetscAbsScalar(pv[j]); 702 } 703 rs -= PetscAbsScalar(pv[diag]); 704 705 sctx.rs = rs; 706 sctx.pv = pv[diag]; 707 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 708 if (sctx.newshift) break; 709 pv[diag] = sctx.pv; 710 } 711 712 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 713 /* 714 * if no shift in this attempt & shifting & started shifting & can refine, 715 * then try lower shift 716 */ 717 sctx.shift_hi = sctx.shift_fraction; 718 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 719 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 720 sctx.newshift = PETSC_TRUE; 721 sctx.nshift++; 722 } 723 } while (sctx.newshift); 724 725 /* invert diagonal entries for simplier triangular solves */ 726 for (i=0; i<n; i++) { 727 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 728 } 729 ierr = PetscFree(rtmp);CHKERRQ(ierr); 730 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 731 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 732 733 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 734 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 735 if (row_identity && col_identity) { 736 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 737 } else { 738 C->ops->solve = MatSolve_SeqAIJ_inplace; 739 } 740 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 741 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 742 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 743 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 744 745 C->assembled = PETSC_TRUE; 746 C->preallocated = PETSC_TRUE; 747 748 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 749 if (sctx.nshift) { 750 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 751 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 752 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 753 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 754 } 755 } 756 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 757 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 758 759 ierr = MatSeqAIJCheckInode(C);CHKERRQ(ierr); 760 PetscFunctionReturn(0); 761 } 762 763 /* 764 This routine implements inplace ILU(0) with row or/and column permutations. 765 Input: 766 A - original matrix 767 Output; 768 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 769 a->j (col index) is permuted by the inverse of colperm, then sorted 770 a->a reordered accordingly with a->j 771 a->diag (ptr to diagonal elements) is updated. 772 */ 773 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 774 { 775 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 776 IS isrow = a->row,isicol = a->icol; 777 PetscErrorCode ierr; 778 const PetscInt *r,*ic,*ics; 779 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 780 PetscInt *ajtmp,nz,row; 781 PetscInt *diag = a->diag,nbdiag,*pj; 782 PetscScalar *rtmp,*pc,multiplier,d; 783 MatScalar *pv,*v; 784 PetscReal rs; 785 FactorShiftCtx sctx; 786 const MatScalar *aa=a->a,*vtmp; 787 788 PetscFunctionBegin; 789 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 790 791 /* MatPivotSetUp(): initialize shift context sctx */ 792 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 793 794 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 795 const PetscInt *ddiag = a->diag; 796 sctx.shift_top = info->zeropivot; 797 for (i=0; i<n; i++) { 798 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 799 d = (aa)[ddiag[i]]; 800 rs = -PetscAbsScalar(d) - PetscRealPart(d); 801 vtmp = aa+ai[i]; 802 nz = ai[i+1] - ai[i]; 803 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 804 if (rs>sctx.shift_top) sctx.shift_top = rs; 805 } 806 sctx.shift_top *= 1.1; 807 sctx.nshift_max = 5; 808 sctx.shift_lo = 0.; 809 sctx.shift_hi = 1.; 810 } 811 812 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 813 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 814 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 815 ierr = PetscArrayzero(rtmp,n+1);CHKERRQ(ierr); 816 ics = ic; 817 818 #if defined(MV) 819 sctx.shift_top = 0.; 820 sctx.nshift_max = 0; 821 sctx.shift_lo = 0.; 822 sctx.shift_hi = 0.; 823 sctx.shift_fraction = 0.; 824 825 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 826 sctx.shift_top = 0.; 827 for (i=0; i<n; i++) { 828 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 829 d = (a->a)[diag[i]]; 830 rs = -PetscAbsScalar(d) - PetscRealPart(d); 831 v = a->a+ai[i]; 832 nz = ai[i+1] - ai[i]; 833 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 834 if (rs>sctx.shift_top) sctx.shift_top = rs; 835 } 836 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 837 sctx.shift_top *= 1.1; 838 sctx.nshift_max = 5; 839 sctx.shift_lo = 0.; 840 sctx.shift_hi = 1.; 841 } 842 843 sctx.shift_amount = 0.; 844 sctx.nshift = 0; 845 #endif 846 847 do { 848 sctx.newshift = PETSC_FALSE; 849 for (i=0; i<n; i++) { 850 /* load in initial unfactored row */ 851 nz = ai[r[i]+1] - ai[r[i]]; 852 ajtmp = aj + ai[r[i]]; 853 v = a->a + ai[r[i]]; 854 /* sort permuted ajtmp and values v accordingly */ 855 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 856 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 857 858 diag[r[i]] = ai[r[i]]; 859 for (j=0; j<nz; j++) { 860 rtmp[ajtmp[j]] = v[j]; 861 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 862 } 863 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 864 865 row = *ajtmp++; 866 while (row < i) { 867 pc = rtmp + row; 868 if (*pc != 0.0) { 869 pv = a->a + diag[r[row]]; 870 pj = aj + diag[r[row]] + 1; 871 872 multiplier = *pc / *pv++; 873 *pc = multiplier; 874 nz = ai[r[row]+1] - diag[r[row]] - 1; 875 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 876 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 877 } 878 row = *ajtmp++; 879 } 880 /* finished row so overwrite it onto a->a */ 881 pv = a->a + ai[r[i]]; 882 pj = aj + ai[r[i]]; 883 nz = ai[r[i]+1] - ai[r[i]]; 884 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 885 886 rs = 0.0; 887 for (j=0; j<nz; j++) { 888 pv[j] = rtmp[pj[j]]; 889 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 890 } 891 892 sctx.rs = rs; 893 sctx.pv = pv[nbdiag]; 894 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 895 if (sctx.newshift) break; 896 pv[nbdiag] = sctx.pv; 897 } 898 899 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 900 /* 901 * if no shift in this attempt & shifting & started shifting & can refine, 902 * then try lower shift 903 */ 904 sctx.shift_hi = sctx.shift_fraction; 905 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 906 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 907 sctx.newshift = PETSC_TRUE; 908 sctx.nshift++; 909 } 910 } while (sctx.newshift); 911 912 /* invert diagonal entries for simplier triangular solves */ 913 for (i=0; i<n; i++) { 914 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 915 } 916 917 ierr = PetscFree(rtmp);CHKERRQ(ierr); 918 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 919 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 920 921 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 922 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 923 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 924 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 925 926 A->assembled = PETSC_TRUE; 927 A->preallocated = PETSC_TRUE; 928 929 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 930 if (sctx.nshift) { 931 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 932 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 933 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 934 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 935 } 936 } 937 PetscFunctionReturn(0); 938 } 939 940 /* ----------------------------------------------------------- */ 941 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 942 { 943 PetscErrorCode ierr; 944 Mat C; 945 946 PetscFunctionBegin; 947 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 948 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 949 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 950 951 A->ops->solve = C->ops->solve; 952 A->ops->solvetranspose = C->ops->solvetranspose; 953 954 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 955 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 956 PetscFunctionReturn(0); 957 } 958 /* ----------------------------------------------------------- */ 959 960 961 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 962 { 963 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 964 IS iscol = a->col,isrow = a->row; 965 PetscErrorCode ierr; 966 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 967 PetscInt nz; 968 const PetscInt *rout,*cout,*r,*c; 969 PetscScalar *x,*tmp,*tmps,sum; 970 const PetscScalar *b; 971 const MatScalar *aa = a->a,*v; 972 973 PetscFunctionBegin; 974 if (!n) PetscFunctionReturn(0); 975 976 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 977 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 978 tmp = a->solve_work; 979 980 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 981 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 982 983 /* forward solve the lower triangular */ 984 tmp[0] = b[*r++]; 985 tmps = tmp; 986 for (i=1; i<n; i++) { 987 v = aa + ai[i]; 988 vi = aj + ai[i]; 989 nz = a->diag[i] - ai[i]; 990 sum = b[*r++]; 991 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 992 tmp[i] = sum; 993 } 994 995 /* backward solve the upper triangular */ 996 for (i=n-1; i>=0; i--) { 997 v = aa + a->diag[i] + 1; 998 vi = aj + a->diag[i] + 1; 999 nz = ai[i+1] - a->diag[i] - 1; 1000 sum = tmp[i]; 1001 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1002 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1003 } 1004 1005 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1006 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1007 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1008 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1009 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1014 { 1015 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1016 IS iscol = a->col,isrow = a->row; 1017 PetscErrorCode ierr; 1018 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1019 PetscInt nz,neq; 1020 const PetscInt *rout,*cout,*r,*c; 1021 PetscScalar *x,*tmp,*tmps,sum; 1022 const PetscScalar *aa = a->a,*v; 1023 const PetscScalar *b; 1024 PetscBool bisdense,xisdense; 1025 1026 PetscFunctionBegin; 1027 if (!n) PetscFunctionReturn(0); 1028 1029 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1030 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1031 if (X != B) { 1032 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1033 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1034 } 1035 1036 ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); 1037 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1038 1039 tmp = a->solve_work; 1040 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1041 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1042 1043 for (neq=0; neq<B->cmap->n; neq++) { 1044 /* forward solve the lower triangular */ 1045 tmp[0] = b[r[0]]; 1046 tmps = tmp; 1047 for (i=1; i<n; i++) { 1048 v = aa + ai[i]; 1049 vi = aj + ai[i]; 1050 nz = a->diag[i] - ai[i]; 1051 sum = b[r[i]]; 1052 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1053 tmp[i] = sum; 1054 } 1055 /* backward solve the upper triangular */ 1056 for (i=n-1; i>=0; i--) { 1057 v = aa + a->diag[i] + 1; 1058 vi = aj + a->diag[i] + 1; 1059 nz = ai[i+1] - a->diag[i] - 1; 1060 sum = tmp[i]; 1061 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1062 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1063 } 1064 1065 b += n; 1066 x += n; 1067 } 1068 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1069 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1070 ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); 1071 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1072 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1077 { 1078 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1079 IS iscol = a->col,isrow = a->row; 1080 PetscErrorCode ierr; 1081 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1082 PetscInt nz,neq; 1083 const PetscInt *rout,*cout,*r,*c; 1084 PetscScalar *x,*tmp,sum; 1085 const PetscScalar *b; 1086 const PetscScalar *aa = a->a,*v; 1087 PetscBool bisdense,xisdense; 1088 1089 PetscFunctionBegin; 1090 if (!n) PetscFunctionReturn(0); 1091 1092 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1093 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1094 if (X != B) { 1095 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1096 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1097 } 1098 1099 ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); 1100 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1101 1102 tmp = a->solve_work; 1103 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1104 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1105 1106 for (neq=0; neq<B->cmap->n; neq++) { 1107 /* forward solve the lower triangular */ 1108 tmp[0] = b[r[0]]; 1109 v = aa; 1110 vi = aj; 1111 for (i=1; i<n; i++) { 1112 nz = ai[i+1] - ai[i]; 1113 sum = b[r[i]]; 1114 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1115 tmp[i] = sum; 1116 v += nz; vi += nz; 1117 } 1118 1119 /* backward solve the upper triangular */ 1120 for (i=n-1; i>=0; i--) { 1121 v = aa + adiag[i+1]+1; 1122 vi = aj + adiag[i+1]+1; 1123 nz = adiag[i]-adiag[i+1]-1; 1124 sum = tmp[i]; 1125 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1126 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1127 } 1128 1129 b += n; 1130 x += n; 1131 } 1132 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1133 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1134 ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); 1135 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1136 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1141 { 1142 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1143 IS iscol = a->col,isrow = a->row; 1144 PetscErrorCode ierr; 1145 const PetscInt *r,*c,*rout,*cout; 1146 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1147 PetscInt nz,row; 1148 PetscScalar *x,*tmp,*tmps,sum; 1149 const PetscScalar *b; 1150 const MatScalar *aa = a->a,*v; 1151 1152 PetscFunctionBegin; 1153 if (!n) PetscFunctionReturn(0); 1154 1155 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1156 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1157 tmp = a->solve_work; 1158 1159 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1160 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1161 1162 /* forward solve the lower triangular */ 1163 tmp[0] = b[*r++]; 1164 tmps = tmp; 1165 for (row=1; row<n; row++) { 1166 i = rout[row]; /* permuted row */ 1167 v = aa + ai[i]; 1168 vi = aj + ai[i]; 1169 nz = a->diag[i] - ai[i]; 1170 sum = b[*r++]; 1171 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1172 tmp[row] = sum; 1173 } 1174 1175 /* backward solve the upper triangular */ 1176 for (row=n-1; row>=0; row--) { 1177 i = rout[row]; /* permuted row */ 1178 v = aa + a->diag[i] + 1; 1179 vi = aj + a->diag[i] + 1; 1180 nz = ai[i+1] - a->diag[i] - 1; 1181 sum = tmp[row]; 1182 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1183 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1184 } 1185 1186 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1187 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1188 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1189 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1190 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 /* ----------------------------------------------------------- */ 1195 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1196 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1197 { 1198 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 PetscInt n = A->rmap->n; 1201 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1202 PetscScalar *x; 1203 const PetscScalar *b; 1204 const MatScalar *aa = a->a; 1205 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1206 PetscInt adiag_i,i,nz,ai_i; 1207 const PetscInt *vi; 1208 const MatScalar *v; 1209 PetscScalar sum; 1210 #endif 1211 1212 PetscFunctionBegin; 1213 if (!n) PetscFunctionReturn(0); 1214 1215 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1216 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1217 1218 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1219 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1220 #else 1221 /* forward solve the lower triangular */ 1222 x[0] = b[0]; 1223 for (i=1; i<n; i++) { 1224 ai_i = ai[i]; 1225 v = aa + ai_i; 1226 vi = aj + ai_i; 1227 nz = adiag[i] - ai_i; 1228 sum = b[i]; 1229 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1230 x[i] = sum; 1231 } 1232 1233 /* backward solve the upper triangular */ 1234 for (i=n-1; i>=0; i--) { 1235 adiag_i = adiag[i]; 1236 v = aa + adiag_i + 1; 1237 vi = aj + adiag_i + 1; 1238 nz = ai[i+1] - adiag_i - 1; 1239 sum = x[i]; 1240 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1241 x[i] = sum*aa[adiag_i]; 1242 } 1243 #endif 1244 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1245 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1246 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1251 { 1252 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1253 IS iscol = a->col,isrow = a->row; 1254 PetscErrorCode ierr; 1255 PetscInt i, n = A->rmap->n,j; 1256 PetscInt nz; 1257 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1258 PetscScalar *x,*tmp,sum; 1259 const PetscScalar *b; 1260 const MatScalar *aa = a->a,*v; 1261 1262 PetscFunctionBegin; 1263 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1264 1265 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1266 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1267 tmp = a->solve_work; 1268 1269 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1270 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1271 1272 /* forward solve the lower triangular */ 1273 tmp[0] = b[*r++]; 1274 for (i=1; i<n; i++) { 1275 v = aa + ai[i]; 1276 vi = aj + ai[i]; 1277 nz = a->diag[i] - ai[i]; 1278 sum = b[*r++]; 1279 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1280 tmp[i] = sum; 1281 } 1282 1283 /* backward solve the upper triangular */ 1284 for (i=n-1; i>=0; i--) { 1285 v = aa + a->diag[i] + 1; 1286 vi = aj + a->diag[i] + 1; 1287 nz = ai[i+1] - a->diag[i] - 1; 1288 sum = tmp[i]; 1289 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1290 tmp[i] = sum*aa[a->diag[i]]; 1291 x[*c--] += tmp[i]; 1292 } 1293 1294 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1295 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1296 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1297 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1298 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1303 { 1304 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1305 IS iscol = a->col,isrow = a->row; 1306 PetscErrorCode ierr; 1307 PetscInt i, n = A->rmap->n,j; 1308 PetscInt nz; 1309 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1310 PetscScalar *x,*tmp,sum; 1311 const PetscScalar *b; 1312 const MatScalar *aa = a->a,*v; 1313 1314 PetscFunctionBegin; 1315 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1316 1317 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1318 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1319 tmp = a->solve_work; 1320 1321 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1322 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1323 1324 /* forward solve the lower triangular */ 1325 tmp[0] = b[r[0]]; 1326 v = aa; 1327 vi = aj; 1328 for (i=1; i<n; i++) { 1329 nz = ai[i+1] - ai[i]; 1330 sum = b[r[i]]; 1331 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1332 tmp[i] = sum; 1333 v += nz; 1334 vi += nz; 1335 } 1336 1337 /* backward solve the upper triangular */ 1338 v = aa + adiag[n-1]; 1339 vi = aj + adiag[n-1]; 1340 for (i=n-1; i>=0; i--) { 1341 nz = adiag[i] - adiag[i+1] - 1; 1342 sum = tmp[i]; 1343 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1344 tmp[i] = sum*v[nz]; 1345 x[c[i]] += tmp[i]; 1346 v += nz+1; vi += nz+1; 1347 } 1348 1349 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1350 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1351 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1352 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1353 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1358 { 1359 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1360 IS iscol = a->col,isrow = a->row; 1361 PetscErrorCode ierr; 1362 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1363 PetscInt i,n = A->rmap->n,j; 1364 PetscInt nz; 1365 PetscScalar *x,*tmp,s1; 1366 const MatScalar *aa = a->a,*v; 1367 const PetscScalar *b; 1368 1369 PetscFunctionBegin; 1370 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1371 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1372 tmp = a->solve_work; 1373 1374 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1375 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1376 1377 /* copy the b into temp work space according to permutation */ 1378 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1379 1380 /* forward solve the U^T */ 1381 for (i=0; i<n; i++) { 1382 v = aa + diag[i]; 1383 vi = aj + diag[i] + 1; 1384 nz = ai[i+1] - diag[i] - 1; 1385 s1 = tmp[i]; 1386 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1387 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1388 tmp[i] = s1; 1389 } 1390 1391 /* backward solve the L^T */ 1392 for (i=n-1; i>=0; i--) { 1393 v = aa + diag[i] - 1; 1394 vi = aj + diag[i] - 1; 1395 nz = diag[i] - ai[i]; 1396 s1 = tmp[i]; 1397 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1398 } 1399 1400 /* copy tmp into x according to permutation */ 1401 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1402 1403 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1404 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1405 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1406 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1407 1408 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1413 { 1414 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1415 IS iscol = a->col,isrow = a->row; 1416 PetscErrorCode ierr; 1417 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1418 PetscInt i,n = A->rmap->n,j; 1419 PetscInt nz; 1420 PetscScalar *x,*tmp,s1; 1421 const MatScalar *aa = a->a,*v; 1422 const PetscScalar *b; 1423 1424 PetscFunctionBegin; 1425 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1426 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1427 tmp = a->solve_work; 1428 1429 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1430 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1431 1432 /* copy the b into temp work space according to permutation */ 1433 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1434 1435 /* forward solve the U^T */ 1436 for (i=0; i<n; i++) { 1437 v = aa + adiag[i+1] + 1; 1438 vi = aj + adiag[i+1] + 1; 1439 nz = adiag[i] - adiag[i+1] - 1; 1440 s1 = tmp[i]; 1441 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1442 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1443 tmp[i] = s1; 1444 } 1445 1446 /* backward solve the L^T */ 1447 for (i=n-1; i>=0; i--) { 1448 v = aa + ai[i]; 1449 vi = aj + ai[i]; 1450 nz = ai[i+1] - ai[i]; 1451 s1 = tmp[i]; 1452 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1453 } 1454 1455 /* copy tmp into x according to permutation */ 1456 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1457 1458 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1459 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1460 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1461 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1462 1463 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1468 { 1469 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1470 IS iscol = a->col,isrow = a->row; 1471 PetscErrorCode ierr; 1472 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1473 PetscInt i,n = A->rmap->n,j; 1474 PetscInt nz; 1475 PetscScalar *x,*tmp,s1; 1476 const MatScalar *aa = a->a,*v; 1477 const PetscScalar *b; 1478 1479 PetscFunctionBegin; 1480 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1481 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1482 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1483 tmp = a->solve_work; 1484 1485 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1486 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1487 1488 /* copy the b into temp work space according to permutation */ 1489 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1490 1491 /* forward solve the U^T */ 1492 for (i=0; i<n; i++) { 1493 v = aa + diag[i]; 1494 vi = aj + diag[i] + 1; 1495 nz = ai[i+1] - diag[i] - 1; 1496 s1 = tmp[i]; 1497 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1498 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1499 tmp[i] = s1; 1500 } 1501 1502 /* backward solve the L^T */ 1503 for (i=n-1; i>=0; i--) { 1504 v = aa + diag[i] - 1; 1505 vi = aj + diag[i] - 1; 1506 nz = diag[i] - ai[i]; 1507 s1 = tmp[i]; 1508 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1509 } 1510 1511 /* copy tmp into x according to permutation */ 1512 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1513 1514 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1515 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1516 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1517 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1518 1519 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1520 PetscFunctionReturn(0); 1521 } 1522 1523 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1524 { 1525 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1526 IS iscol = a->col,isrow = a->row; 1527 PetscErrorCode ierr; 1528 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1529 PetscInt i,n = A->rmap->n,j; 1530 PetscInt nz; 1531 PetscScalar *x,*tmp,s1; 1532 const MatScalar *aa = a->a,*v; 1533 const PetscScalar *b; 1534 1535 PetscFunctionBegin; 1536 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1537 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1538 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1539 tmp = a->solve_work; 1540 1541 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1542 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1543 1544 /* copy the b into temp work space according to permutation */ 1545 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1546 1547 /* forward solve the U^T */ 1548 for (i=0; i<n; i++) { 1549 v = aa + adiag[i+1] + 1; 1550 vi = aj + adiag[i+1] + 1; 1551 nz = adiag[i] - adiag[i+1] - 1; 1552 s1 = tmp[i]; 1553 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1554 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1555 tmp[i] = s1; 1556 } 1557 1558 1559 /* backward solve the L^T */ 1560 for (i=n-1; i>=0; i--) { 1561 v = aa + ai[i]; 1562 vi = aj + ai[i]; 1563 nz = ai[i+1] - ai[i]; 1564 s1 = tmp[i]; 1565 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1566 } 1567 1568 /* copy tmp into x according to permutation */ 1569 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1570 1571 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1572 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1573 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1574 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1575 1576 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 /* ----------------------------------------------------------------*/ 1581 1582 /* 1583 ilu() under revised new data structure. 1584 Factored arrays bj and ba are stored as 1585 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1586 1587 bi=fact->i is an array of size n+1, in which 1588 bi+ 1589 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1590 bi[n]: points to L(n-1,n-1)+1 1591 1592 bdiag=fact->diag is an array of size n+1,in which 1593 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1594 bdiag[n]: points to entry of U(n-1,0)-1 1595 1596 U(i,:) contains bdiag[i] as its last entry, i.e., 1597 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1598 */ 1599 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1600 { 1601 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1602 PetscErrorCode ierr; 1603 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1604 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1605 IS isicol; 1606 1607 PetscFunctionBegin; 1608 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1609 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1610 b = (Mat_SeqAIJ*)(fact)->data; 1611 1612 /* allocate matrix arrays for new data structure */ 1613 ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); 1614 ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1615 1616 b->singlemalloc = PETSC_TRUE; 1617 if (!b->diag) { 1618 ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr); 1619 ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1620 } 1621 bdiag = b->diag; 1622 1623 if (n > 0) { 1624 ierr = PetscArrayzero(b->a,ai[n]);CHKERRQ(ierr); 1625 } 1626 1627 /* set bi and bj with new data structure */ 1628 bi = b->i; 1629 bj = b->j; 1630 1631 /* L part */ 1632 bi[0] = 0; 1633 for (i=0; i<n; i++) { 1634 nz = adiag[i] - ai[i]; 1635 bi[i+1] = bi[i] + nz; 1636 aj = a->j + ai[i]; 1637 for (j=0; j<nz; j++) { 1638 /* *bj = aj[j]; bj++; */ 1639 bj[k++] = aj[j]; 1640 } 1641 } 1642 1643 /* U part */ 1644 bdiag[n] = bi[n]-1; 1645 for (i=n-1; i>=0; i--) { 1646 nz = ai[i+1] - adiag[i] - 1; 1647 aj = a->j + adiag[i] + 1; 1648 for (j=0; j<nz; j++) { 1649 /* *bj = aj[j]; bj++; */ 1650 bj[k++] = aj[j]; 1651 } 1652 /* diag[i] */ 1653 /* *bj = i; bj++; */ 1654 bj[k++] = i; 1655 bdiag[i] = bdiag[i+1] + nz + 1; 1656 } 1657 1658 fact->factortype = MAT_FACTOR_ILU; 1659 fact->info.factor_mallocs = 0; 1660 fact->info.fill_ratio_given = info->fill; 1661 fact->info.fill_ratio_needed = 1.0; 1662 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1663 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1664 1665 b = (Mat_SeqAIJ*)(fact)->data; 1666 b->row = isrow; 1667 b->col = iscol; 1668 b->icol = isicol; 1669 ierr = PetscMalloc1(fact->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1670 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1671 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1672 PetscFunctionReturn(0); 1673 } 1674 1675 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1676 { 1677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1678 IS isicol; 1679 PetscErrorCode ierr; 1680 const PetscInt *r,*ic; 1681 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1682 PetscInt *bi,*cols,nnz,*cols_lvl; 1683 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1684 PetscInt i,levels,diagonal_fill; 1685 PetscBool col_identity,row_identity,missing; 1686 PetscReal f; 1687 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1688 PetscBT lnkbt; 1689 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1690 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1691 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1692 1693 PetscFunctionBegin; 1694 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1695 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1696 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1697 1698 levels = (PetscInt)info->levels; 1699 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1700 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1701 if (!levels && row_identity && col_identity) { 1702 /* special case: ilu(0) with natural ordering */ 1703 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1704 if (a->inode.size) { 1705 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1706 } 1707 PetscFunctionReturn(0); 1708 } 1709 1710 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1711 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1712 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1713 1714 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1715 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1716 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1717 bi[0] = bdiag[0] = 0; 1718 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1719 1720 /* create a linked list for storing column indices of the active row */ 1721 nlnk = n + 1; 1722 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1723 1724 /* initial FreeSpace size is f*(ai[n]+1) */ 1725 f = info->fill; 1726 diagonal_fill = (PetscInt)info->diagonal_fill; 1727 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 1728 current_space = free_space; 1729 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 1730 current_space_lvl = free_space_lvl; 1731 for (i=0; i<n; i++) { 1732 nzi = 0; 1733 /* copy current row into linked list */ 1734 nnz = ai[r[i]+1] - ai[r[i]]; 1735 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1736 cols = aj + ai[r[i]]; 1737 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1738 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1739 nzi += nlnk; 1740 1741 /* make sure diagonal entry is included */ 1742 if (diagonal_fill && lnk[i] == -1) { 1743 fm = n; 1744 while (lnk[fm] < i) fm = lnk[fm]; 1745 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1746 lnk[fm] = i; 1747 lnk_lvl[i] = 0; 1748 nzi++; dcount++; 1749 } 1750 1751 /* add pivot rows into the active row */ 1752 nzbd = 0; 1753 prow = lnk[n]; 1754 while (prow < i) { 1755 nnz = bdiag[prow]; 1756 cols = bj_ptr[prow] + nnz + 1; 1757 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1758 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1759 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1760 nzi += nlnk; 1761 prow = lnk[prow]; 1762 nzbd++; 1763 } 1764 bdiag[i] = nzbd; 1765 bi[i+1] = bi[i] + nzi; 1766 /* if free space is not available, make more free space */ 1767 if (current_space->local_remaining<nzi) { 1768 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */ 1769 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1770 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1771 reallocs++; 1772 } 1773 1774 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1775 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1776 bj_ptr[i] = current_space->array; 1777 bjlvl_ptr[i] = current_space_lvl->array; 1778 1779 /* make sure the active row i has diagonal entry */ 1780 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1781 1782 current_space->array += nzi; 1783 current_space->local_used += nzi; 1784 current_space->local_remaining -= nzi; 1785 current_space_lvl->array += nzi; 1786 current_space_lvl->local_used += nzi; 1787 current_space_lvl->local_remaining -= nzi; 1788 } 1789 1790 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1791 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1792 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1793 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 1794 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1795 1796 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1797 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1798 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1799 1800 #if defined(PETSC_USE_INFO) 1801 { 1802 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1803 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 1804 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1805 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 1806 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1807 if (diagonal_fill) { 1808 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 1809 } 1810 } 1811 #endif 1812 /* put together the new matrix */ 1813 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1814 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 1815 b = (Mat_SeqAIJ*)(fact)->data; 1816 1817 b->free_a = PETSC_TRUE; 1818 b->free_ij = PETSC_TRUE; 1819 b->singlemalloc = PETSC_FALSE; 1820 1821 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 1822 1823 b->j = bj; 1824 b->i = bi; 1825 b->diag = bdiag; 1826 b->ilen = 0; 1827 b->imax = 0; 1828 b->row = isrow; 1829 b->col = iscol; 1830 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1831 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1832 b->icol = isicol; 1833 1834 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 1835 /* In b structure: Free imax, ilen, old a, old j. 1836 Allocate bdiag, solve_work, new a, new j */ 1837 ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1838 b->maxnz = b->nz = bdiag[0]+1; 1839 1840 (fact)->info.factor_mallocs = reallocs; 1841 (fact)->info.fill_ratio_given = f; 1842 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1843 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1844 if (a->inode.size) { 1845 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1846 } 1847 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1848 PetscFunctionReturn(0); 1849 } 1850 1851 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1852 { 1853 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1854 IS isicol; 1855 PetscErrorCode ierr; 1856 const PetscInt *r,*ic; 1857 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1858 PetscInt *bi,*cols,nnz,*cols_lvl; 1859 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1860 PetscInt i,levels,diagonal_fill; 1861 PetscBool col_identity,row_identity; 1862 PetscReal f; 1863 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1864 PetscBT lnkbt; 1865 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1866 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1867 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1868 PetscBool missing; 1869 1870 PetscFunctionBegin; 1871 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1872 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1873 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1874 1875 f = info->fill; 1876 levels = (PetscInt)info->levels; 1877 diagonal_fill = (PetscInt)info->diagonal_fill; 1878 1879 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1880 1881 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1882 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1883 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1884 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1885 1886 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1887 if (a->inode.size) { 1888 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1889 } 1890 fact->factortype = MAT_FACTOR_ILU; 1891 (fact)->info.factor_mallocs = 0; 1892 (fact)->info.fill_ratio_given = info->fill; 1893 (fact)->info.fill_ratio_needed = 1.0; 1894 1895 b = (Mat_SeqAIJ*)(fact)->data; 1896 b->row = isrow; 1897 b->col = iscol; 1898 b->icol = isicol; 1899 ierr = PetscMalloc1((fact)->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1900 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1901 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1902 PetscFunctionReturn(0); 1903 } 1904 1905 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1906 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1907 1908 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1909 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1910 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1911 bi[0] = bdiag[0] = 0; 1912 1913 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1914 1915 /* create a linked list for storing column indices of the active row */ 1916 nlnk = n + 1; 1917 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1918 1919 /* initial FreeSpace size is f*(ai[n]+1) */ 1920 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 1921 current_space = free_space; 1922 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 1923 current_space_lvl = free_space_lvl; 1924 1925 for (i=0; i<n; i++) { 1926 nzi = 0; 1927 /* copy current row into linked list */ 1928 nnz = ai[r[i]+1] - ai[r[i]]; 1929 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1930 cols = aj + ai[r[i]]; 1931 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1932 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1933 nzi += nlnk; 1934 1935 /* make sure diagonal entry is included */ 1936 if (diagonal_fill && lnk[i] == -1) { 1937 fm = n; 1938 while (lnk[fm] < i) fm = lnk[fm]; 1939 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1940 lnk[fm] = i; 1941 lnk_lvl[i] = 0; 1942 nzi++; dcount++; 1943 } 1944 1945 /* add pivot rows into the active row */ 1946 nzbd = 0; 1947 prow = lnk[n]; 1948 while (prow < i) { 1949 nnz = bdiag[prow]; 1950 cols = bj_ptr[prow] + nnz + 1; 1951 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1952 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1953 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1954 nzi += nlnk; 1955 prow = lnk[prow]; 1956 nzbd++; 1957 } 1958 bdiag[i] = nzbd; 1959 bi[i+1] = bi[i] + nzi; 1960 1961 /* if free space is not available, make more free space */ 1962 if (current_space->local_remaining<nzi) { 1963 nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */ 1964 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1965 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1966 reallocs++; 1967 } 1968 1969 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1970 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1971 bj_ptr[i] = current_space->array; 1972 bjlvl_ptr[i] = current_space_lvl->array; 1973 1974 /* make sure the active row i has diagonal entry */ 1975 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1976 1977 current_space->array += nzi; 1978 current_space->local_used += nzi; 1979 current_space->local_remaining -= nzi; 1980 current_space_lvl->array += nzi; 1981 current_space_lvl->local_used += nzi; 1982 current_space_lvl->local_remaining -= nzi; 1983 } 1984 1985 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1986 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1987 1988 /* destroy list of free space and other temporary arrays */ 1989 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 1990 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 1991 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1992 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1993 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1994 1995 #if defined(PETSC_USE_INFO) 1996 { 1997 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1998 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 1999 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2000 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 2001 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2002 if (diagonal_fill) { 2003 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 2004 } 2005 } 2006 #endif 2007 2008 /* put together the new matrix */ 2009 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2010 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 2011 b = (Mat_SeqAIJ*)(fact)->data; 2012 2013 b->free_a = PETSC_TRUE; 2014 b->free_ij = PETSC_TRUE; 2015 b->singlemalloc = PETSC_FALSE; 2016 2017 ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr); 2018 b->j = bj; 2019 b->i = bi; 2020 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2021 b->diag = bdiag; 2022 b->ilen = 0; 2023 b->imax = 0; 2024 b->row = isrow; 2025 b->col = iscol; 2026 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2027 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2028 b->icol = isicol; 2029 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 2030 /* In b structure: Free imax, ilen, old a, old j. 2031 Allocate bdiag, solve_work, new a, new j */ 2032 ierr = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2033 b->maxnz = b->nz = bi[n]; 2034 2035 (fact)->info.factor_mallocs = reallocs; 2036 (fact)->info.fill_ratio_given = f; 2037 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2038 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2039 if (a->inode.size) { 2040 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2041 } 2042 PetscFunctionReturn(0); 2043 } 2044 2045 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2046 { 2047 Mat C = B; 2048 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2049 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2050 IS ip=b->row,iip = b->icol; 2051 PetscErrorCode ierr; 2052 const PetscInt *rip,*riip; 2053 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2054 PetscInt *ai=a->i,*aj=a->j; 2055 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2056 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2057 PetscBool perm_identity; 2058 FactorShiftCtx sctx; 2059 PetscReal rs; 2060 MatScalar d,*v; 2061 2062 PetscFunctionBegin; 2063 /* MatPivotSetUp(): initialize shift context sctx */ 2064 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2065 2066 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2067 sctx.shift_top = info->zeropivot; 2068 for (i=0; i<mbs; i++) { 2069 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2070 d = (aa)[a->diag[i]]; 2071 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2072 v = aa+ai[i]; 2073 nz = ai[i+1] - ai[i]; 2074 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2075 if (rs>sctx.shift_top) sctx.shift_top = rs; 2076 } 2077 sctx.shift_top *= 1.1; 2078 sctx.nshift_max = 5; 2079 sctx.shift_lo = 0.; 2080 sctx.shift_hi = 1.; 2081 } 2082 2083 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2084 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2085 2086 /* allocate working arrays 2087 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2088 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2089 */ 2090 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr); 2091 2092 do { 2093 sctx.newshift = PETSC_FALSE; 2094 2095 for (i=0; i<mbs; i++) c2r[i] = mbs; 2096 if (mbs) il[0] = 0; 2097 2098 for (k = 0; k<mbs; k++) { 2099 /* zero rtmp */ 2100 nz = bi[k+1] - bi[k]; 2101 bjtmp = bj + bi[k]; 2102 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2103 2104 /* load in initial unfactored row */ 2105 bval = ba + bi[k]; 2106 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2107 for (j = jmin; j < jmax; j++) { 2108 col = riip[aj[j]]; 2109 if (col >= k) { /* only take upper triangular entry */ 2110 rtmp[col] = aa[j]; 2111 *bval++ = 0.0; /* for in-place factorization */ 2112 } 2113 } 2114 /* shift the diagonal of the matrix: ZeropivotApply() */ 2115 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2116 2117 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2118 dk = rtmp[k]; 2119 i = c2r[k]; /* first row to be added to k_th row */ 2120 2121 while (i < k) { 2122 nexti = c2r[i]; /* next row to be added to k_th row */ 2123 2124 /* compute multiplier, update diag(k) and U(i,k) */ 2125 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2126 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2127 dk += uikdi*ba[ili]; /* update diag[k] */ 2128 ba[ili] = uikdi; /* -U(i,k) */ 2129 2130 /* add multiple of row i to k-th row */ 2131 jmin = ili + 1; jmax = bi[i+1]; 2132 if (jmin < jmax) { 2133 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2134 /* update il and c2r for row i */ 2135 il[i] = jmin; 2136 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2137 } 2138 i = nexti; 2139 } 2140 2141 /* copy data into U(k,:) */ 2142 rs = 0.0; 2143 jmin = bi[k]; jmax = bi[k+1]-1; 2144 if (jmin < jmax) { 2145 for (j=jmin; j<jmax; j++) { 2146 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2147 } 2148 /* add the k-th row into il and c2r */ 2149 il[k] = jmin; 2150 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2151 } 2152 2153 /* MatPivotCheck() */ 2154 sctx.rs = rs; 2155 sctx.pv = dk; 2156 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 2157 if (sctx.newshift) break; 2158 dk = sctx.pv; 2159 2160 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2161 } 2162 } while (sctx.newshift); 2163 2164 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2165 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2166 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2167 2168 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2169 if (perm_identity) { 2170 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2171 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2172 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2173 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2174 } else { 2175 B->ops->solve = MatSolve_SeqSBAIJ_1; 2176 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2177 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2178 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2179 } 2180 2181 C->assembled = PETSC_TRUE; 2182 C->preallocated = PETSC_TRUE; 2183 2184 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2185 2186 /* MatPivotView() */ 2187 if (sctx.nshift) { 2188 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2189 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 2190 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2191 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2192 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2193 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 2194 } 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2200 { 2201 Mat C = B; 2202 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2203 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2204 IS ip=b->row,iip = b->icol; 2205 PetscErrorCode ierr; 2206 const PetscInt *rip,*riip; 2207 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2208 PetscInt *ai=a->i,*aj=a->j; 2209 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2210 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2211 PetscBool perm_identity; 2212 FactorShiftCtx sctx; 2213 PetscReal rs; 2214 MatScalar d,*v; 2215 2216 PetscFunctionBegin; 2217 /* MatPivotSetUp(): initialize shift context sctx */ 2218 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2219 2220 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2221 sctx.shift_top = info->zeropivot; 2222 for (i=0; i<mbs; i++) { 2223 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2224 d = (aa)[a->diag[i]]; 2225 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2226 v = aa+ai[i]; 2227 nz = ai[i+1] - ai[i]; 2228 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2229 if (rs>sctx.shift_top) sctx.shift_top = rs; 2230 } 2231 sctx.shift_top *= 1.1; 2232 sctx.nshift_max = 5; 2233 sctx.shift_lo = 0.; 2234 sctx.shift_hi = 1.; 2235 } 2236 2237 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2238 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2239 2240 /* initialization */ 2241 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr); 2242 2243 do { 2244 sctx.newshift = PETSC_FALSE; 2245 2246 for (i=0; i<mbs; i++) jl[i] = mbs; 2247 il[0] = 0; 2248 2249 for (k = 0; k<mbs; k++) { 2250 /* zero rtmp */ 2251 nz = bi[k+1] - bi[k]; 2252 bjtmp = bj + bi[k]; 2253 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2254 2255 bval = ba + bi[k]; 2256 /* initialize k-th row by the perm[k]-th row of A */ 2257 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2258 for (j = jmin; j < jmax; j++) { 2259 col = riip[aj[j]]; 2260 if (col >= k) { /* only take upper triangular entry */ 2261 rtmp[col] = aa[j]; 2262 *bval++ = 0.0; /* for in-place factorization */ 2263 } 2264 } 2265 /* shift the diagonal of the matrix */ 2266 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2267 2268 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2269 dk = rtmp[k]; 2270 i = jl[k]; /* first row to be added to k_th row */ 2271 2272 while (i < k) { 2273 nexti = jl[i]; /* next row to be added to k_th row */ 2274 2275 /* compute multiplier, update diag(k) and U(i,k) */ 2276 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2277 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2278 dk += uikdi*ba[ili]; 2279 ba[ili] = uikdi; /* -U(i,k) */ 2280 2281 /* add multiple of row i to k-th row */ 2282 jmin = ili + 1; jmax = bi[i+1]; 2283 if (jmin < jmax) { 2284 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2285 /* update il and jl for row i */ 2286 il[i] = jmin; 2287 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2288 } 2289 i = nexti; 2290 } 2291 2292 /* shift the diagonals when zero pivot is detected */ 2293 /* compute rs=sum of abs(off-diagonal) */ 2294 rs = 0.0; 2295 jmin = bi[k]+1; 2296 nz = bi[k+1] - jmin; 2297 bcol = bj + jmin; 2298 for (j=0; j<nz; j++) { 2299 rs += PetscAbsScalar(rtmp[bcol[j]]); 2300 } 2301 2302 sctx.rs = rs; 2303 sctx.pv = dk; 2304 ierr = MatPivotCheck(B,A,info,&sctx,k);CHKERRQ(ierr); 2305 if (sctx.newshift) break; 2306 dk = sctx.pv; 2307 2308 /* copy data into U(k,:) */ 2309 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2310 jmin = bi[k]+1; jmax = bi[k+1]; 2311 if (jmin < jmax) { 2312 for (j=jmin; j<jmax; j++) { 2313 col = bj[j]; ba[j] = rtmp[col]; 2314 } 2315 /* add the k-th row into il and jl */ 2316 il[k] = jmin; 2317 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2318 } 2319 } 2320 } while (sctx.newshift); 2321 2322 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2323 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2324 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2325 2326 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2327 if (perm_identity) { 2328 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2329 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2330 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2331 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2332 } else { 2333 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2334 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2335 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2336 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2337 } 2338 2339 C->assembled = PETSC_TRUE; 2340 C->preallocated = PETSC_TRUE; 2341 2342 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2343 if (sctx.nshift) { 2344 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2345 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2346 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2347 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2348 } 2349 } 2350 PetscFunctionReturn(0); 2351 } 2352 2353 /* 2354 icc() under revised new data structure. 2355 Factored arrays bj and ba are stored as 2356 U(0,:),...,U(i,:),U(n-1,:) 2357 2358 ui=fact->i is an array of size n+1, in which 2359 ui+ 2360 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2361 ui[n]: points to U(n-1,n-1)+1 2362 2363 udiag=fact->diag is an array of size n,in which 2364 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2365 2366 U(i,:) contains udiag[i] as its last entry, i.e., 2367 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2368 */ 2369 2370 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2371 { 2372 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2373 Mat_SeqSBAIJ *b; 2374 PetscErrorCode ierr; 2375 PetscBool perm_identity,missing; 2376 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2377 const PetscInt *rip,*riip; 2378 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2379 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2380 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2381 PetscReal fill =info->fill,levels=info->levels; 2382 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2383 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2384 PetscBT lnkbt; 2385 IS iperm; 2386 2387 PetscFunctionBegin; 2388 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2389 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2390 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2391 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2392 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2393 2394 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2395 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2396 ui[0] = 0; 2397 2398 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2399 if (!levels && perm_identity) { 2400 for (i=0; i<am; i++) { 2401 ncols = ai[i+1] - a->diag[i]; 2402 ui[i+1] = ui[i] + ncols; 2403 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2404 } 2405 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2406 cols = uj; 2407 for (i=0; i<am; i++) { 2408 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2409 ncols = ai[i+1] - a->diag[i] -1; 2410 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2411 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2412 } 2413 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2414 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2415 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2416 2417 /* initialization */ 2418 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2419 2420 /* jl: linked list for storing indices of the pivot rows 2421 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2422 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2423 for (i=0; i<am; i++) { 2424 jl[i] = am; il[i] = 0; 2425 } 2426 2427 /* create and initialize a linked list for storing column indices of the active row k */ 2428 nlnk = am + 1; 2429 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2430 2431 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2432 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2433 current_space = free_space; 2434 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2435 current_space_lvl = free_space_lvl; 2436 2437 for (k=0; k<am; k++) { /* for each active row k */ 2438 /* initialize lnk by the column indices of row rip[k] of A */ 2439 nzk = 0; 2440 ncols = ai[rip[k]+1] - ai[rip[k]]; 2441 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2442 ncols_upper = 0; 2443 for (j=0; j<ncols; j++) { 2444 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2445 if (riip[i] >= k) { /* only take upper triangular entry */ 2446 ajtmp[ncols_upper] = i; 2447 ncols_upper++; 2448 } 2449 } 2450 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2451 nzk += nlnk; 2452 2453 /* update lnk by computing fill-in for each pivot row to be merged in */ 2454 prow = jl[k]; /* 1st pivot row */ 2455 2456 while (prow < k) { 2457 nextprow = jl[prow]; 2458 2459 /* merge prow into k-th row */ 2460 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2461 jmax = ui[prow+1]; 2462 ncols = jmax-jmin; 2463 i = jmin - ui[prow]; 2464 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2465 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2466 j = *(uj - 1); 2467 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2468 nzk += nlnk; 2469 2470 /* update il and jl for prow */ 2471 if (jmin < jmax) { 2472 il[prow] = jmin; 2473 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2474 } 2475 prow = nextprow; 2476 } 2477 2478 /* if free space is not available, make more free space */ 2479 if (current_space->local_remaining<nzk) { 2480 i = am - k + 1; /* num of unfactored rows */ 2481 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2482 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2483 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2484 reallocs++; 2485 } 2486 2487 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2488 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2489 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2490 2491 /* add the k-th row into il and jl */ 2492 if (nzk > 1) { 2493 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2494 jl[k] = jl[i]; jl[i] = k; 2495 il[k] = ui[k] + 1; 2496 } 2497 uj_ptr[k] = current_space->array; 2498 uj_lvl_ptr[k] = current_space_lvl->array; 2499 2500 current_space->array += nzk; 2501 current_space->local_used += nzk; 2502 current_space->local_remaining -= nzk; 2503 2504 current_space_lvl->array += nzk; 2505 current_space_lvl->local_used += nzk; 2506 current_space_lvl->local_remaining -= nzk; 2507 2508 ui[k+1] = ui[k] + nzk; 2509 } 2510 2511 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2512 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2513 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2514 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2515 2516 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2517 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2518 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2519 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2520 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2521 2522 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2523 2524 /* put together the new matrix in MATSEQSBAIJ format */ 2525 b = (Mat_SeqSBAIJ*)(fact)->data; 2526 b->singlemalloc = PETSC_FALSE; 2527 2528 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2529 2530 b->j = uj; 2531 b->i = ui; 2532 b->diag = udiag; 2533 b->free_diag = PETSC_TRUE; 2534 b->ilen = 0; 2535 b->imax = 0; 2536 b->row = perm; 2537 b->col = perm; 2538 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2539 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2540 b->icol = iperm; 2541 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2542 2543 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2544 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2545 2546 b->maxnz = b->nz = ui[am]; 2547 b->free_a = PETSC_TRUE; 2548 b->free_ij = PETSC_TRUE; 2549 2550 fact->info.factor_mallocs = reallocs; 2551 fact->info.fill_ratio_given = fill; 2552 if (ai[am] != 0) { 2553 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2554 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2555 } else { 2556 fact->info.fill_ratio_needed = 0.0; 2557 } 2558 #if defined(PETSC_USE_INFO) 2559 if (ai[am] != 0) { 2560 PetscReal af = fact->info.fill_ratio_needed; 2561 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2562 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2563 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2564 } else { 2565 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 2566 } 2567 #endif 2568 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2569 PetscFunctionReturn(0); 2570 } 2571 2572 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2573 { 2574 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2575 Mat_SeqSBAIJ *b; 2576 PetscErrorCode ierr; 2577 PetscBool perm_identity,missing; 2578 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2579 const PetscInt *rip,*riip; 2580 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2581 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2582 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2583 PetscReal fill =info->fill,levels=info->levels; 2584 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2585 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2586 PetscBT lnkbt; 2587 IS iperm; 2588 2589 PetscFunctionBegin; 2590 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2591 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2592 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2593 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2594 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2595 2596 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2597 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2598 ui[0] = 0; 2599 2600 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2601 if (!levels && perm_identity) { 2602 2603 for (i=0; i<am; i++) { 2604 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2605 udiag[i] = ui[i]; 2606 } 2607 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2608 cols = uj; 2609 for (i=0; i<am; i++) { 2610 aj = a->j + a->diag[i]; 2611 ncols = ui[i+1] - ui[i]; 2612 for (j=0; j<ncols; j++) *cols++ = *aj++; 2613 } 2614 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2615 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2616 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2617 2618 /* initialization */ 2619 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2620 2621 /* jl: linked list for storing indices of the pivot rows 2622 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2623 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2624 for (i=0; i<am; i++) { 2625 jl[i] = am; il[i] = 0; 2626 } 2627 2628 /* create and initialize a linked list for storing column indices of the active row k */ 2629 nlnk = am + 1; 2630 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2631 2632 /* initial FreeSpace size is fill*(ai[am]+1) */ 2633 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr); 2634 current_space = free_space; 2635 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);CHKERRQ(ierr); 2636 current_space_lvl = free_space_lvl; 2637 2638 for (k=0; k<am; k++) { /* for each active row k */ 2639 /* initialize lnk by the column indices of row rip[k] of A */ 2640 nzk = 0; 2641 ncols = ai[rip[k]+1] - ai[rip[k]]; 2642 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2643 ncols_upper = 0; 2644 for (j=0; j<ncols; j++) { 2645 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2646 if (riip[i] >= k) { /* only take upper triangular entry */ 2647 ajtmp[ncols_upper] = i; 2648 ncols_upper++; 2649 } 2650 } 2651 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2652 nzk += nlnk; 2653 2654 /* update lnk by computing fill-in for each pivot row to be merged in */ 2655 prow = jl[k]; /* 1st pivot row */ 2656 2657 while (prow < k) { 2658 nextprow = jl[prow]; 2659 2660 /* merge prow into k-th row */ 2661 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2662 jmax = ui[prow+1]; 2663 ncols = jmax-jmin; 2664 i = jmin - ui[prow]; 2665 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2666 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2667 j = *(uj - 1); 2668 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2669 nzk += nlnk; 2670 2671 /* update il and jl for prow */ 2672 if (jmin < jmax) { 2673 il[prow] = jmin; 2674 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2675 } 2676 prow = nextprow; 2677 } 2678 2679 /* if free space is not available, make more free space */ 2680 if (current_space->local_remaining<nzk) { 2681 i = am - k + 1; /* num of unfactored rows */ 2682 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2683 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2684 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2685 reallocs++; 2686 } 2687 2688 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2689 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2690 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2691 2692 /* add the k-th row into il and jl */ 2693 if (nzk > 1) { 2694 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2695 jl[k] = jl[i]; jl[i] = k; 2696 il[k] = ui[k] + 1; 2697 } 2698 uj_ptr[k] = current_space->array; 2699 uj_lvl_ptr[k] = current_space_lvl->array; 2700 2701 current_space->array += nzk; 2702 current_space->local_used += nzk; 2703 current_space->local_remaining -= nzk; 2704 2705 current_space_lvl->array += nzk; 2706 current_space_lvl->local_used += nzk; 2707 current_space_lvl->local_remaining -= nzk; 2708 2709 ui[k+1] = ui[k] + nzk; 2710 } 2711 2712 #if defined(PETSC_USE_INFO) 2713 if (ai[am] != 0) { 2714 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2715 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2716 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2717 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2718 } else { 2719 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 2720 } 2721 #endif 2722 2723 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2724 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2725 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2726 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2727 2728 /* destroy list of free space and other temporary array(s) */ 2729 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2730 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2731 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2732 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2733 2734 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2735 2736 /* put together the new matrix in MATSEQSBAIJ format */ 2737 2738 b = (Mat_SeqSBAIJ*)fact->data; 2739 b->singlemalloc = PETSC_FALSE; 2740 2741 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2742 2743 b->j = uj; 2744 b->i = ui; 2745 b->diag = udiag; 2746 b->free_diag = PETSC_TRUE; 2747 b->ilen = 0; 2748 b->imax = 0; 2749 b->row = perm; 2750 b->col = perm; 2751 2752 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2753 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2754 2755 b->icol = iperm; 2756 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2757 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2758 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2759 b->maxnz = b->nz = ui[am]; 2760 b->free_a = PETSC_TRUE; 2761 b->free_ij = PETSC_TRUE; 2762 2763 fact->info.factor_mallocs = reallocs; 2764 fact->info.fill_ratio_given = fill; 2765 if (ai[am] != 0) { 2766 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2767 } else { 2768 fact->info.fill_ratio_needed = 0.0; 2769 } 2770 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2771 PetscFunctionReturn(0); 2772 } 2773 2774 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2775 { 2776 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2777 Mat_SeqSBAIJ *b; 2778 PetscErrorCode ierr; 2779 PetscBool perm_identity,missing; 2780 PetscReal fill = info->fill; 2781 const PetscInt *rip,*riip; 2782 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2783 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2784 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2785 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2786 PetscBT lnkbt; 2787 IS iperm; 2788 2789 PetscFunctionBegin; 2790 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2791 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2792 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2793 2794 /* check whether perm is the identity mapping */ 2795 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2796 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2797 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2798 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2799 2800 /* initialization */ 2801 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2802 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2803 ui[0] = 0; 2804 2805 /* jl: linked list for storing indices of the pivot rows 2806 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2807 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2808 for (i=0; i<am; i++) { 2809 jl[i] = am; il[i] = 0; 2810 } 2811 2812 /* create and initialize a linked list for storing column indices of the active row k */ 2813 nlnk = am + 1; 2814 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2815 2816 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2817 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2818 current_space = free_space; 2819 2820 for (k=0; k<am; k++) { /* for each active row k */ 2821 /* initialize lnk by the column indices of row rip[k] of A */ 2822 nzk = 0; 2823 ncols = ai[rip[k]+1] - ai[rip[k]]; 2824 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2825 ncols_upper = 0; 2826 for (j=0; j<ncols; j++) { 2827 i = riip[*(aj + ai[rip[k]] + j)]; 2828 if (i >= k) { /* only take upper triangular entry */ 2829 cols[ncols_upper] = i; 2830 ncols_upper++; 2831 } 2832 } 2833 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2834 nzk += nlnk; 2835 2836 /* update lnk by computing fill-in for each pivot row to be merged in */ 2837 prow = jl[k]; /* 1st pivot row */ 2838 2839 while (prow < k) { 2840 nextprow = jl[prow]; 2841 /* merge prow into k-th row */ 2842 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2843 jmax = ui[prow+1]; 2844 ncols = jmax-jmin; 2845 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2846 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2847 nzk += nlnk; 2848 2849 /* update il and jl for prow */ 2850 if (jmin < jmax) { 2851 il[prow] = jmin; 2852 j = *uj_ptr; 2853 jl[prow] = jl[j]; 2854 jl[j] = prow; 2855 } 2856 prow = nextprow; 2857 } 2858 2859 /* if free space is not available, make more free space */ 2860 if (current_space->local_remaining<nzk) { 2861 i = am - k + 1; /* num of unfactored rows */ 2862 i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2863 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2864 reallocs++; 2865 } 2866 2867 /* copy data into free space, then initialize lnk */ 2868 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2869 2870 /* add the k-th row into il and jl */ 2871 if (nzk > 1) { 2872 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2873 jl[k] = jl[i]; jl[i] = k; 2874 il[k] = ui[k] + 1; 2875 } 2876 ui_ptr[k] = current_space->array; 2877 2878 current_space->array += nzk; 2879 current_space->local_used += nzk; 2880 current_space->local_remaining -= nzk; 2881 2882 ui[k+1] = ui[k] + nzk; 2883 } 2884 2885 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2886 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2887 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2888 2889 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2890 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2891 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2892 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2893 2894 /* put together the new matrix in MATSEQSBAIJ format */ 2895 2896 b = (Mat_SeqSBAIJ*)fact->data; 2897 b->singlemalloc = PETSC_FALSE; 2898 b->free_a = PETSC_TRUE; 2899 b->free_ij = PETSC_TRUE; 2900 2901 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2902 2903 b->j = uj; 2904 b->i = ui; 2905 b->diag = udiag; 2906 b->free_diag = PETSC_TRUE; 2907 b->ilen = 0; 2908 b->imax = 0; 2909 b->row = perm; 2910 b->col = perm; 2911 2912 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2913 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2914 2915 b->icol = iperm; 2916 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2917 2918 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2919 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2920 2921 b->maxnz = b->nz = ui[am]; 2922 2923 fact->info.factor_mallocs = reallocs; 2924 fact->info.fill_ratio_given = fill; 2925 if (ai[am] != 0) { 2926 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2927 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2928 } else { 2929 fact->info.fill_ratio_needed = 0.0; 2930 } 2931 #if defined(PETSC_USE_INFO) 2932 if (ai[am] != 0) { 2933 PetscReal af = fact->info.fill_ratio_needed; 2934 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2935 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2936 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2937 } else { 2938 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 2939 } 2940 #endif 2941 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2942 PetscFunctionReturn(0); 2943 } 2944 2945 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2946 { 2947 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2948 Mat_SeqSBAIJ *b; 2949 PetscErrorCode ierr; 2950 PetscBool perm_identity,missing; 2951 PetscReal fill = info->fill; 2952 const PetscInt *rip,*riip; 2953 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2954 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2955 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2956 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2957 PetscBT lnkbt; 2958 IS iperm; 2959 2960 PetscFunctionBegin; 2961 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2962 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2963 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2964 2965 /* check whether perm is the identity mapping */ 2966 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2967 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2968 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2969 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2970 2971 /* initialization */ 2972 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2973 ui[0] = 0; 2974 2975 /* jl: linked list for storing indices of the pivot rows 2976 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2977 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2978 for (i=0; i<am; i++) { 2979 jl[i] = am; il[i] = 0; 2980 } 2981 2982 /* create and initialize a linked list for storing column indices of the active row k */ 2983 nlnk = am + 1; 2984 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2985 2986 /* initial FreeSpace size is fill*(ai[am]+1) */ 2987 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr); 2988 current_space = free_space; 2989 2990 for (k=0; k<am; k++) { /* for each active row k */ 2991 /* initialize lnk by the column indices of row rip[k] of A */ 2992 nzk = 0; 2993 ncols = ai[rip[k]+1] - ai[rip[k]]; 2994 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2995 ncols_upper = 0; 2996 for (j=0; j<ncols; j++) { 2997 i = riip[*(aj + ai[rip[k]] + j)]; 2998 if (i >= k) { /* only take upper triangular entry */ 2999 cols[ncols_upper] = i; 3000 ncols_upper++; 3001 } 3002 } 3003 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3004 nzk += nlnk; 3005 3006 /* update lnk by computing fill-in for each pivot row to be merged in */ 3007 prow = jl[k]; /* 1st pivot row */ 3008 3009 while (prow < k) { 3010 nextprow = jl[prow]; 3011 /* merge prow into k-th row */ 3012 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3013 jmax = ui[prow+1]; 3014 ncols = jmax-jmin; 3015 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3016 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3017 nzk += nlnk; 3018 3019 /* update il and jl for prow */ 3020 if (jmin < jmax) { 3021 il[prow] = jmin; 3022 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3023 } 3024 prow = nextprow; 3025 } 3026 3027 /* if free space is not available, make more free space */ 3028 if (current_space->local_remaining<nzk) { 3029 i = am - k + 1; /* num of unfactored rows */ 3030 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3031 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3032 reallocs++; 3033 } 3034 3035 /* copy data into free space, then initialize lnk */ 3036 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3037 3038 /* add the k-th row into il and jl */ 3039 if (nzk-1 > 0) { 3040 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3041 jl[k] = jl[i]; jl[i] = k; 3042 il[k] = ui[k] + 1; 3043 } 3044 ui_ptr[k] = current_space->array; 3045 3046 current_space->array += nzk; 3047 current_space->local_used += nzk; 3048 current_space->local_remaining -= nzk; 3049 3050 ui[k+1] = ui[k] + nzk; 3051 } 3052 3053 #if defined(PETSC_USE_INFO) 3054 if (ai[am] != 0) { 3055 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3056 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 3057 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 3058 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 3059 } else { 3060 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 3061 } 3062 #endif 3063 3064 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3065 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3066 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3067 3068 /* destroy list of free space and other temporary array(s) */ 3069 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 3070 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3071 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3072 3073 /* put together the new matrix in MATSEQSBAIJ format */ 3074 3075 b = (Mat_SeqSBAIJ*)fact->data; 3076 b->singlemalloc = PETSC_FALSE; 3077 b->free_a = PETSC_TRUE; 3078 b->free_ij = PETSC_TRUE; 3079 3080 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 3081 3082 b->j = uj; 3083 b->i = ui; 3084 b->diag = 0; 3085 b->ilen = 0; 3086 b->imax = 0; 3087 b->row = perm; 3088 b->col = perm; 3089 3090 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3091 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3092 3093 b->icol = iperm; 3094 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3095 3096 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 3097 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3098 b->maxnz = b->nz = ui[am]; 3099 3100 fact->info.factor_mallocs = reallocs; 3101 fact->info.fill_ratio_given = fill; 3102 if (ai[am] != 0) { 3103 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3104 } else { 3105 fact->info.fill_ratio_needed = 0.0; 3106 } 3107 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3108 PetscFunctionReturn(0); 3109 } 3110 3111 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3112 { 3113 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3114 PetscErrorCode ierr; 3115 PetscInt n = A->rmap->n; 3116 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3117 PetscScalar *x,sum; 3118 const PetscScalar *b; 3119 const MatScalar *aa = a->a,*v; 3120 PetscInt i,nz; 3121 3122 PetscFunctionBegin; 3123 if (!n) PetscFunctionReturn(0); 3124 3125 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3126 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 3127 3128 /* forward solve the lower triangular */ 3129 x[0] = b[0]; 3130 v = aa; 3131 vi = aj; 3132 for (i=1; i<n; i++) { 3133 nz = ai[i+1] - ai[i]; 3134 sum = b[i]; 3135 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3136 v += nz; 3137 vi += nz; 3138 x[i] = sum; 3139 } 3140 3141 /* backward solve the upper triangular */ 3142 for (i=n-1; i>=0; i--) { 3143 v = aa + adiag[i+1] + 1; 3144 vi = aj + adiag[i+1] + 1; 3145 nz = adiag[i] - adiag[i+1]-1; 3146 sum = x[i]; 3147 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3148 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3149 } 3150 3151 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3152 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3153 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 3154 PetscFunctionReturn(0); 3155 } 3156 3157 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3158 { 3159 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3160 IS iscol = a->col,isrow = a->row; 3161 PetscErrorCode ierr; 3162 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3163 const PetscInt *rout,*cout,*r,*c; 3164 PetscScalar *x,*tmp,sum; 3165 const PetscScalar *b; 3166 const MatScalar *aa = a->a,*v; 3167 3168 PetscFunctionBegin; 3169 if (!n) PetscFunctionReturn(0); 3170 3171 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3172 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 3173 tmp = a->solve_work; 3174 3175 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3176 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3177 3178 /* forward solve the lower triangular */ 3179 tmp[0] = b[r[0]]; 3180 v = aa; 3181 vi = aj; 3182 for (i=1; i<n; i++) { 3183 nz = ai[i+1] - ai[i]; 3184 sum = b[r[i]]; 3185 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3186 tmp[i] = sum; 3187 v += nz; vi += nz; 3188 } 3189 3190 /* backward solve the upper triangular */ 3191 for (i=n-1; i>=0; i--) { 3192 v = aa + adiag[i+1]+1; 3193 vi = aj + adiag[i+1]+1; 3194 nz = adiag[i]-adiag[i+1]-1; 3195 sum = tmp[i]; 3196 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3197 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3198 } 3199 3200 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3201 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3202 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3203 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 3204 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3205 PetscFunctionReturn(0); 3206 } 3207 3208 /* 3209 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3210 */ 3211 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3212 { 3213 Mat B = *fact; 3214 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3215 IS isicol; 3216 PetscErrorCode ierr; 3217 const PetscInt *r,*ic; 3218 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3219 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3220 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3221 PetscInt nlnk,*lnk; 3222 PetscBT lnkbt; 3223 PetscBool row_identity,icol_identity; 3224 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3225 const PetscInt *ics; 3226 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3227 PetscReal dt =info->dt,shift=info->shiftamount; 3228 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3229 PetscBool missing; 3230 3231 PetscFunctionBegin; 3232 if (dt == PETSC_DEFAULT) dt = 0.005; 3233 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3234 3235 /* ------- symbolic factorization, can be reused ---------*/ 3236 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3237 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3238 adiag=a->diag; 3239 3240 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3241 3242 /* bdiag is location of diagonal in factor */ 3243 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3244 ierr = PetscMalloc1(n+1,&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3245 3246 /* allocate row pointers bi */ 3247 ierr = PetscMalloc1(2*n+2,&bi);CHKERRQ(ierr); 3248 3249 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3250 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3251 nnz_max = ai[n]+2*n*dtcount+2; 3252 3253 ierr = PetscMalloc1(nnz_max+1,&bj);CHKERRQ(ierr); 3254 ierr = PetscMalloc1(nnz_max+1,&ba);CHKERRQ(ierr); 3255 3256 /* put together the new matrix */ 3257 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3258 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 3259 b = (Mat_SeqAIJ*)B->data; 3260 3261 b->free_a = PETSC_TRUE; 3262 b->free_ij = PETSC_TRUE; 3263 b->singlemalloc = PETSC_FALSE; 3264 3265 b->a = ba; 3266 b->j = bj; 3267 b->i = bi; 3268 b->diag = bdiag; 3269 b->ilen = 0; 3270 b->imax = 0; 3271 b->row = isrow; 3272 b->col = iscol; 3273 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3274 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3275 b->icol = isicol; 3276 3277 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 3278 ierr = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3279 b->maxnz = nnz_max; 3280 3281 B->factortype = MAT_FACTOR_ILUDT; 3282 B->info.factor_mallocs = 0; 3283 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3284 /* ------- end of symbolic factorization ---------*/ 3285 3286 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3287 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3288 ics = ic; 3289 3290 /* linked list for storing column indices of the active row */ 3291 nlnk = n + 1; 3292 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3293 3294 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3295 ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr); 3296 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3297 ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr); 3298 ierr = PetscArrayzero(rtmp,n);CHKERRQ(ierr); 3299 3300 bi[0] = 0; 3301 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3302 bdiag_rev[n] = bdiag[0]; 3303 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3304 for (i=0; i<n; i++) { 3305 /* copy initial fill into linked list */ 3306 nzi = ai[r[i]+1] - ai[r[i]]; 3307 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3308 nzi_al = adiag[r[i]] - ai[r[i]]; 3309 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3310 ajtmp = aj + ai[r[i]]; 3311 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3312 3313 /* load in initial (unfactored row) */ 3314 aatmp = a->a + ai[r[i]]; 3315 for (j=0; j<nzi; j++) { 3316 rtmp[ics[*ajtmp++]] = *aatmp++; 3317 } 3318 3319 /* add pivot rows into linked list */ 3320 row = lnk[n]; 3321 while (row < i) { 3322 nzi_bl = bi[row+1] - bi[row] + 1; 3323 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3324 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3325 nzi += nlnk; 3326 row = lnk[row]; 3327 } 3328 3329 /* copy data from lnk into jtmp, then initialize lnk */ 3330 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3331 3332 /* numerical factorization */ 3333 bjtmp = jtmp; 3334 row = *bjtmp++; /* 1st pivot row */ 3335 while (row < i) { 3336 pc = rtmp + row; 3337 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3338 multiplier = (*pc) * (*pv); 3339 *pc = multiplier; 3340 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3341 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3342 pv = ba + bdiag[row+1] + 1; 3343 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3344 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3345 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3346 } 3347 row = *bjtmp++; 3348 } 3349 3350 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3351 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3352 nzi_bl = 0; j = 0; 3353 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3354 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3355 nzi_bl++; j++; 3356 } 3357 nzi_bu = nzi - nzi_bl -1; 3358 while (j < nzi) { 3359 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3360 j++; 3361 } 3362 3363 bjtmp = bj + bi[i]; 3364 batmp = ba + bi[i]; 3365 /* apply level dropping rule to L part */ 3366 ncut = nzi_al + dtcount; 3367 if (ncut < nzi_bl) { 3368 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3369 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3370 } else { 3371 ncut = nzi_bl; 3372 } 3373 for (j=0; j<ncut; j++) { 3374 bjtmp[j] = jtmp[j]; 3375 batmp[j] = vtmp[j]; 3376 } 3377 bi[i+1] = bi[i] + ncut; 3378 nzi = ncut + 1; 3379 3380 /* apply level dropping rule to U part */ 3381 ncut = nzi_au + dtcount; 3382 if (ncut < nzi_bu) { 3383 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3384 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3385 } else { 3386 ncut = nzi_bu; 3387 } 3388 nzi += ncut; 3389 3390 /* mark bdiagonal */ 3391 bdiag[i+1] = bdiag[i] - (ncut + 1); 3392 bdiag_rev[n-i-1] = bdiag[i+1]; 3393 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3394 bjtmp = bj + bdiag[i]; 3395 batmp = ba + bdiag[i]; 3396 *bjtmp = i; 3397 *batmp = diag_tmp; /* rtmp[i]; */ 3398 if (*batmp == 0.0) { 3399 *batmp = dt+shift; 3400 } 3401 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3402 3403 bjtmp = bj + bdiag[i+1]+1; 3404 batmp = ba + bdiag[i+1]+1; 3405 for (k=0; k<ncut; k++) { 3406 bjtmp[k] = jtmp[nzi_bl+1+k]; 3407 batmp[k] = vtmp[nzi_bl+1+k]; 3408 } 3409 3410 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3411 } /* for (i=0; i<n; i++) */ 3412 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3413 3414 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3415 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3416 3417 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3418 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3419 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3420 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3421 3422 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3423 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3424 3425 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3426 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3427 if (row_identity && icol_identity) { 3428 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3429 } else { 3430 B->ops->solve = MatSolve_SeqAIJ; 3431 } 3432 3433 B->ops->solveadd = 0; 3434 B->ops->solvetranspose = 0; 3435 B->ops->solvetransposeadd = 0; 3436 B->ops->matsolve = 0; 3437 B->assembled = PETSC_TRUE; 3438 B->preallocated = PETSC_TRUE; 3439 PetscFunctionReturn(0); 3440 } 3441 3442 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3443 /* 3444 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3445 */ 3446 3447 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3448 { 3449 PetscErrorCode ierr; 3450 3451 PetscFunctionBegin; 3452 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3453 PetscFunctionReturn(0); 3454 } 3455 3456 /* 3457 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3458 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3459 */ 3460 /* 3461 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3462 */ 3463 3464 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3465 { 3466 Mat C =fact; 3467 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3468 IS isrow = b->row,isicol = b->icol; 3469 PetscErrorCode ierr; 3470 const PetscInt *r,*ic,*ics; 3471 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3472 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3473 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3474 PetscReal dt=info->dt,shift=info->shiftamount; 3475 PetscBool row_identity, col_identity; 3476 3477 PetscFunctionBegin; 3478 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3479 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3480 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 3481 ics = ic; 3482 3483 for (i=0; i<n; i++) { 3484 /* initialize rtmp array */ 3485 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3486 bjtmp = bj + bi[i]; 3487 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3488 rtmp[i] = 0.0; 3489 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3490 bjtmp = bj + bdiag[i+1] + 1; 3491 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3492 3493 /* load in initial unfactored row of A */ 3494 nz = ai[r[i]+1] - ai[r[i]]; 3495 ajtmp = aj + ai[r[i]]; 3496 v = aa + ai[r[i]]; 3497 for (j=0; j<nz; j++) { 3498 rtmp[ics[*ajtmp++]] = v[j]; 3499 } 3500 3501 /* numerical factorization */ 3502 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3503 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3504 k = 0; 3505 while (k < nzl) { 3506 row = *bjtmp++; 3507 pc = rtmp + row; 3508 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3509 multiplier = (*pc) * (*pv); 3510 *pc = multiplier; 3511 if (PetscAbsScalar(multiplier) > dt) { 3512 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3513 pv = b->a + bdiag[row+1] + 1; 3514 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3515 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3516 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3517 } 3518 k++; 3519 } 3520 3521 /* finished row so stick it into b->a */ 3522 /* L-part */ 3523 pv = b->a + bi[i]; 3524 pj = bj + bi[i]; 3525 nzl = bi[i+1] - bi[i]; 3526 for (j=0; j<nzl; j++) { 3527 pv[j] = rtmp[pj[j]]; 3528 } 3529 3530 /* diagonal: invert diagonal entries for simplier triangular solves */ 3531 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3532 b->a[bdiag[i]] = 1.0/rtmp[i]; 3533 3534 /* U-part */ 3535 pv = b->a + bdiag[i+1] + 1; 3536 pj = bj + bdiag[i+1] + 1; 3537 nzu = bdiag[i] - bdiag[i+1] - 1; 3538 for (j=0; j<nzu; j++) { 3539 pv[j] = rtmp[pj[j]]; 3540 } 3541 } 3542 3543 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3544 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3545 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3546 3547 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3548 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3549 if (row_identity && col_identity) { 3550 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3551 } else { 3552 C->ops->solve = MatSolve_SeqAIJ; 3553 } 3554 C->ops->solveadd = 0; 3555 C->ops->solvetranspose = 0; 3556 C->ops->solvetransposeadd = 0; 3557 C->ops->matsolve = 0; 3558 C->assembled = PETSC_TRUE; 3559 C->preallocated = PETSC_TRUE; 3560 3561 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3562 PetscFunctionReturn(0); 3563 } 3564