1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 /* 8 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 9 10 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 11 */ 12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 13 { 14 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 15 PetscErrorCode ierr; 16 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 17 const PetscInt *ai = a->i, *aj = a->j; 18 const PetscScalar *aa = a->a; 19 PetscBool *done; 20 PetscReal best,past = 0,future; 21 22 PetscFunctionBegin; 23 /* pick initial row */ 24 best = -1; 25 for (i=0; i<n; i++) { 26 future = 0.0; 27 for (j=ai[i]; j<ai[i+1]; j++) { 28 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 29 else past = PetscAbsScalar(aa[j]); 30 } 31 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 32 if (past/future > best) { 33 best = past/future; 34 current = i; 35 } 36 } 37 38 ierr = PetscMalloc1(n,&done);CHKERRQ(ierr); 39 ierr = PetscArrayzero(done,n);CHKERRQ(ierr); 40 ierr = PetscMalloc1(n,&order);CHKERRQ(ierr); 41 order[0] = current; 42 for (i=0; i<n-1; i++) { 43 done[current] = PETSC_TRUE; 44 best = -1; 45 /* loop over all neighbors of current pivot */ 46 for (j=ai[current]; j<ai[current+1]; j++) { 47 jj = aj[j]; 48 if (done[jj]) continue; 49 /* loop over columns of potential next row computing weights for below and above diagonal */ 50 past = future = 0.0; 51 for (k=ai[jj]; k<ai[jj+1]; k++) { 52 kk = aj[k]; 53 if (done[kk]) past += PetscAbsScalar(aa[k]); 54 else if (kk != jj) future += PetscAbsScalar(aa[k]); 55 } 56 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 57 if (past/future > best) { 58 best = past/future; 59 newcurrent = jj; 60 } 61 } 62 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 63 best = -1; 64 for (k=0; k<n; k++) { 65 if (done[k]) continue; 66 future = 0.0; 67 past = 0.0; 68 for (j=ai[k]; j<ai[k+1]; j++) { 69 kk = aj[j]; 70 if (done[kk]) past += PetscAbsScalar(aa[j]); 71 else if (kk != k) future += PetscAbsScalar(aa[j]); 72 } 73 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 74 if (past/future > best) { 75 best = past/future; 76 newcurrent = k; 77 } 78 } 79 } 80 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 81 current = newcurrent; 82 order[i+1] = current; 83 } 84 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 85 *icol = *irow; 86 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 87 ierr = PetscFree(done);CHKERRQ(ierr); 88 ierr = PetscFree(order);CHKERRQ(ierr); 89 PetscFunctionReturn(0); 90 } 91 92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A,MatSolverType *type) 93 { 94 PetscFunctionBegin; 95 *type = MATSOLVERPETSC; 96 PetscFunctionReturn(0); 97 } 98 99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 100 { 101 PetscInt n = A->rmap->n; 102 PetscErrorCode ierr; 103 104 PetscFunctionBegin; 105 #if defined(PETSC_USE_COMPLEX) 106 if (A->hermitian && !A->symmetric && (ftype == MAT_FACTOR_CHOLESKY||ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY or ICC Factor is not supported"); 107 #endif 108 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 109 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 110 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 111 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 112 113 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 114 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 115 116 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 117 ierr = PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 118 ierr = PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILU]);CHKERRQ(ierr); 119 ierr = PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILUDT]);CHKERRQ(ierr); 120 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 121 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 122 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 123 124 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 125 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 126 ierr = PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr); 127 ierr = PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ICC]);CHKERRQ(ierr); 128 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 129 (*B)->factortype = ftype; 130 131 ierr = PetscFree((*B)->solvertype);CHKERRQ(ierr); 132 ierr = PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);CHKERRQ(ierr); 133 (*B)->canuseordering = PETSC_TRUE; 134 ierr = PetscObjectComposeFunction((PetscObject)*B,"MatFactorGetSolverType_C",MatFactorGetSolverType_petsc);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 139 { 140 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 141 IS isicol; 142 PetscErrorCode ierr; 143 const PetscInt *r,*ic; 144 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 145 PetscInt *bi,*bj,*ajtmp; 146 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 147 PetscReal f; 148 PetscInt nlnk,*lnk,k,**bi_ptr; 149 PetscFreeSpaceList free_space=NULL,current_space=NULL; 150 PetscBT lnkbt; 151 PetscBool missing; 152 153 PetscFunctionBegin; 154 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 155 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 156 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 157 158 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 159 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 160 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 161 162 /* get new row pointers */ 163 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 164 bi[0] = 0; 165 166 /* bdiag is location of diagonal in factor */ 167 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 168 bdiag[0] = 0; 169 170 /* linked list for storing column indices of the active row */ 171 nlnk = n + 1; 172 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 173 174 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 175 176 /* initial FreeSpace size is f*(ai[n]+1) */ 177 f = info->fill; 178 if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 179 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 180 current_space = free_space; 181 182 for (i=0; i<n; i++) { 183 /* copy previous fill into linked list */ 184 nzi = 0; 185 nnz = ai[r[i]+1] - ai[r[i]]; 186 ajtmp = aj + ai[r[i]]; 187 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 188 nzi += nlnk; 189 190 /* add pivot rows into linked list */ 191 row = lnk[n]; 192 while (row < i) { 193 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 194 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 195 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 196 nzi += nlnk; 197 row = lnk[row]; 198 } 199 bi[i+1] = bi[i] + nzi; 200 im[i] = nzi; 201 202 /* mark bdiag */ 203 nzbd = 0; 204 nnz = nzi; 205 k = lnk[n]; 206 while (nnz-- && k < i) { 207 nzbd++; 208 k = lnk[k]; 209 } 210 bdiag[i] = bi[i] + nzbd; 211 212 /* if free space is not available, make more free space */ 213 if (current_space->local_remaining<nzi) { 214 nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */ 215 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 216 reallocs++; 217 } 218 219 /* copy data into free space, then initialize lnk */ 220 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 221 222 bi_ptr[i] = current_space->array; 223 current_space->array += nzi; 224 current_space->local_used += nzi; 225 current_space->local_remaining -= nzi; 226 } 227 #if defined(PETSC_USE_INFO) 228 if (ai[n] != 0) { 229 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 230 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 231 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 232 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 233 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 234 } else { 235 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 236 } 237 #endif 238 239 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 240 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 241 242 /* destroy list of free space and other temporary array(s) */ 243 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 244 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 245 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 246 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 247 248 /* put together the new matrix */ 249 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 250 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 251 b = (Mat_SeqAIJ*)(B)->data; 252 253 b->free_a = PETSC_TRUE; 254 b->free_ij = PETSC_TRUE; 255 b->singlemalloc = PETSC_FALSE; 256 257 ierr = PetscMalloc1(bi[n]+1,&b->a);CHKERRQ(ierr); 258 b->j = bj; 259 b->i = bi; 260 b->diag = bdiag; 261 b->ilen = NULL; 262 b->imax = NULL; 263 b->row = isrow; 264 b->col = iscol; 265 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 266 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 267 b->icol = isicol; 268 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 269 270 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 271 ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 272 b->maxnz = b->nz = bi[n]; 273 274 (B)->factortype = MAT_FACTOR_LU; 275 (B)->info.factor_mallocs = reallocs; 276 (B)->info.fill_ratio_given = f; 277 278 if (ai[n]) { 279 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 280 } else { 281 (B)->info.fill_ratio_needed = 0.0; 282 } 283 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 284 if (a->inode.size) { 285 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 286 } 287 PetscFunctionReturn(0); 288 } 289 290 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 291 { 292 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 293 IS isicol; 294 PetscErrorCode ierr; 295 const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp; 296 PetscInt i,n=A->rmap->n; 297 PetscInt *bi,*bj; 298 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 299 PetscReal f; 300 PetscInt nlnk,*lnk,k,**bi_ptr; 301 PetscFreeSpaceList free_space=NULL,current_space=NULL; 302 PetscBT lnkbt; 303 PetscBool missing; 304 305 PetscFunctionBegin; 306 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 307 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 308 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 309 310 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 311 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 312 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 313 314 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 315 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 316 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 317 bi[0] = bdiag[0] = 0; 318 319 /* linked list for storing column indices of the active row */ 320 nlnk = n + 1; 321 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 322 323 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 324 325 /* initial FreeSpace size is f*(ai[n]+1) */ 326 f = info->fill; 327 if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */ 328 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 329 current_space = free_space; 330 331 for (i=0; i<n; i++) { 332 /* copy previous fill into linked list */ 333 nzi = 0; 334 nnz = ai[r[i]+1] - ai[r[i]]; 335 ajtmp = aj + ai[r[i]]; 336 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 337 nzi += nlnk; 338 339 /* add pivot rows into linked list */ 340 row = lnk[n]; 341 while (row < i) { 342 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 343 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 344 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 345 nzi += nlnk; 346 row = lnk[row]; 347 } 348 bi[i+1] = bi[i] + nzi; 349 im[i] = nzi; 350 351 /* mark bdiag */ 352 nzbd = 0; 353 nnz = nzi; 354 k = lnk[n]; 355 while (nnz-- && k < i) { 356 nzbd++; 357 k = lnk[k]; 358 } 359 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 360 361 /* if free space is not available, make more free space */ 362 if (current_space->local_remaining<nzi) { 363 /* estimated additional space needed */ 364 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi)); 365 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 366 reallocs++; 367 } 368 369 /* copy data into free space, then initialize lnk */ 370 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 371 372 bi_ptr[i] = current_space->array; 373 current_space->array += nzi; 374 current_space->local_used += nzi; 375 current_space->local_remaining -= nzi; 376 } 377 378 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 379 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 380 381 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 382 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 383 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 384 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 385 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 386 387 /* put together the new matrix */ 388 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 389 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 390 b = (Mat_SeqAIJ*)(B)->data; 391 392 b->free_a = PETSC_TRUE; 393 b->free_ij = PETSC_TRUE; 394 b->singlemalloc = PETSC_FALSE; 395 396 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 397 398 b->j = bj; 399 b->i = bi; 400 b->diag = bdiag; 401 b->ilen = NULL; 402 b->imax = NULL; 403 b->row = isrow; 404 b->col = iscol; 405 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 406 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 407 b->icol = isicol; 408 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 409 410 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 411 ierr = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 412 b->maxnz = b->nz = bdiag[0]+1; 413 414 B->factortype = MAT_FACTOR_LU; 415 B->info.factor_mallocs = reallocs; 416 B->info.fill_ratio_given = f; 417 418 if (ai[n]) { 419 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 420 } else { 421 B->info.fill_ratio_needed = 0.0; 422 } 423 #if defined(PETSC_USE_INFO) 424 if (ai[n] != 0) { 425 PetscReal af = B->info.fill_ratio_needed; 426 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 427 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 428 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 429 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 430 } else { 431 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 432 } 433 #endif 434 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 435 if (a->inode.size) { 436 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 437 } 438 ierr = MatSeqAIJCheckInode_FactorLU(B);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 /* 443 Trouble in factorization, should we dump the original matrix? 444 */ 445 PetscErrorCode MatFactorDumpMatrix(Mat A) 446 { 447 PetscErrorCode ierr; 448 PetscBool flg = PETSC_FALSE; 449 450 PetscFunctionBegin; 451 ierr = PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr); 452 if (flg) { 453 PetscViewer viewer; 454 char filename[PETSC_MAX_PATH_LEN]; 455 456 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 457 ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 458 ierr = MatView(A,viewer);CHKERRQ(ierr); 459 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 460 } 461 PetscFunctionReturn(0); 462 } 463 464 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 465 { 466 Mat C =B; 467 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 468 IS isrow = b->row,isicol = b->icol; 469 PetscErrorCode ierr; 470 const PetscInt *r,*ic,*ics; 471 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 472 PetscInt i,j,k,nz,nzL,row,*pj; 473 const PetscInt *ajtmp,*bjtmp; 474 MatScalar *rtmp,*pc,multiplier,*pv; 475 const MatScalar *aa=a->a,*v; 476 PetscBool row_identity,col_identity; 477 FactorShiftCtx sctx; 478 const PetscInt *ddiag; 479 PetscReal rs; 480 MatScalar d; 481 482 PetscFunctionBegin; 483 /* MatPivotSetUp(): initialize shift context sctx */ 484 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 485 486 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 487 ddiag = a->diag; 488 sctx.shift_top = info->zeropivot; 489 for (i=0; i<n; i++) { 490 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 491 d = (aa)[ddiag[i]]; 492 rs = -PetscAbsScalar(d) - PetscRealPart(d); 493 v = aa+ai[i]; 494 nz = ai[i+1] - ai[i]; 495 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 496 if (rs>sctx.shift_top) sctx.shift_top = rs; 497 } 498 sctx.shift_top *= 1.1; 499 sctx.nshift_max = 5; 500 sctx.shift_lo = 0.; 501 sctx.shift_hi = 1.; 502 } 503 504 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 505 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 506 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 507 ics = ic; 508 509 do { 510 sctx.newshift = PETSC_FALSE; 511 for (i=0; i<n; i++) { 512 /* zero rtmp */ 513 /* L part */ 514 nz = bi[i+1] - bi[i]; 515 bjtmp = bj + bi[i]; 516 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 517 518 /* U part */ 519 nz = bdiag[i]-bdiag[i+1]; 520 bjtmp = bj + bdiag[i+1]+1; 521 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 522 523 /* load in initial (unfactored row) */ 524 nz = ai[r[i]+1] - ai[r[i]]; 525 ajtmp = aj + ai[r[i]]; 526 v = aa + ai[r[i]]; 527 for (j=0; j<nz; j++) { 528 rtmp[ics[ajtmp[j]]] = v[j]; 529 } 530 /* ZeropivotApply() */ 531 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 532 533 /* elimination */ 534 bjtmp = bj + bi[i]; 535 row = *bjtmp++; 536 nzL = bi[i+1] - bi[i]; 537 for (k=0; k < nzL; k++) { 538 pc = rtmp + row; 539 if (*pc != 0.0) { 540 pv = b->a + bdiag[row]; 541 multiplier = *pc * (*pv); 542 *pc = multiplier; 543 544 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 545 pv = b->a + bdiag[row+1]+1; 546 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 547 548 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 549 ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr); 550 } 551 row = *bjtmp++; 552 } 553 554 /* finished row so stick it into b->a */ 555 rs = 0.0; 556 /* L part */ 557 pv = b->a + bi[i]; 558 pj = b->j + bi[i]; 559 nz = bi[i+1] - bi[i]; 560 for (j=0; j<nz; j++) { 561 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 562 } 563 564 /* U part */ 565 pv = b->a + bdiag[i+1]+1; 566 pj = b->j + bdiag[i+1]+1; 567 nz = bdiag[i] - bdiag[i+1]-1; 568 for (j=0; j<nz; j++) { 569 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 570 } 571 572 sctx.rs = rs; 573 sctx.pv = rtmp[i]; 574 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 575 if (sctx.newshift) break; /* break for-loop */ 576 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 577 578 /* Mark diagonal and invert diagonal for simpler triangular solves */ 579 pv = b->a + bdiag[i]; 580 *pv = 1.0/rtmp[i]; 581 582 } /* endof for (i=0; i<n; i++) { */ 583 584 /* MatPivotRefine() */ 585 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 586 /* 587 * if no shift in this attempt & shifting & started shifting & can refine, 588 * then try lower shift 589 */ 590 sctx.shift_hi = sctx.shift_fraction; 591 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 592 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 593 sctx.newshift = PETSC_TRUE; 594 sctx.nshift++; 595 } 596 } while (sctx.newshift); 597 598 ierr = PetscFree(rtmp);CHKERRQ(ierr); 599 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 600 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 601 602 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 603 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 604 if (b->inode.size) { 605 C->ops->solve = MatSolve_SeqAIJ_Inode; 606 } else if (row_identity && col_identity) { 607 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 608 } else { 609 C->ops->solve = MatSolve_SeqAIJ; 610 } 611 C->ops->solveadd = MatSolveAdd_SeqAIJ; 612 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 613 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 614 C->ops->matsolve = MatMatSolve_SeqAIJ; 615 C->assembled = PETSC_TRUE; 616 C->preallocated = PETSC_TRUE; 617 618 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 619 620 /* MatShiftView(A,info,&sctx) */ 621 if (sctx.nshift) { 622 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 623 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 624 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 625 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 626 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 627 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 628 } 629 } 630 PetscFunctionReturn(0); 631 } 632 633 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 634 { 635 Mat C =B; 636 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 637 IS isrow = b->row,isicol = b->icol; 638 PetscErrorCode ierr; 639 const PetscInt *r,*ic,*ics; 640 PetscInt nz,row,i,j,n=A->rmap->n,diag; 641 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 642 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 643 MatScalar *pv,*rtmp,*pc,multiplier,d; 644 const MatScalar *v,*aa=a->a; 645 PetscReal rs=0.0; 646 FactorShiftCtx sctx; 647 const PetscInt *ddiag; 648 PetscBool row_identity, col_identity; 649 650 PetscFunctionBegin; 651 /* MatPivotSetUp(): initialize shift context sctx */ 652 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 653 654 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 655 ddiag = a->diag; 656 sctx.shift_top = info->zeropivot; 657 for (i=0; i<n; i++) { 658 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 659 d = (aa)[ddiag[i]]; 660 rs = -PetscAbsScalar(d) - PetscRealPart(d); 661 v = aa+ai[i]; 662 nz = ai[i+1] - ai[i]; 663 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 664 if (rs>sctx.shift_top) sctx.shift_top = rs; 665 } 666 sctx.shift_top *= 1.1; 667 sctx.nshift_max = 5; 668 sctx.shift_lo = 0.; 669 sctx.shift_hi = 1.; 670 } 671 672 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 673 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 674 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 675 ics = ic; 676 677 do { 678 sctx.newshift = PETSC_FALSE; 679 for (i=0; i<n; i++) { 680 nz = bi[i+1] - bi[i]; 681 bjtmp = bj + bi[i]; 682 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 683 684 /* load in initial (unfactored row) */ 685 nz = ai[r[i]+1] - ai[r[i]]; 686 ajtmp = aj + ai[r[i]]; 687 v = aa + ai[r[i]]; 688 for (j=0; j<nz; j++) { 689 rtmp[ics[ajtmp[j]]] = v[j]; 690 } 691 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 692 693 row = *bjtmp++; 694 while (row < i) { 695 pc = rtmp + row; 696 if (*pc != 0.0) { 697 pv = b->a + diag_offset[row]; 698 pj = b->j + diag_offset[row] + 1; 699 multiplier = *pc / *pv++; 700 *pc = multiplier; 701 nz = bi[row+1] - diag_offset[row] - 1; 702 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 703 ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr); 704 } 705 row = *bjtmp++; 706 } 707 /* finished row so stick it into b->a */ 708 pv = b->a + bi[i]; 709 pj = b->j + bi[i]; 710 nz = bi[i+1] - bi[i]; 711 diag = diag_offset[i] - bi[i]; 712 rs = 0.0; 713 for (j=0; j<nz; j++) { 714 pv[j] = rtmp[pj[j]]; 715 rs += PetscAbsScalar(pv[j]); 716 } 717 rs -= PetscAbsScalar(pv[diag]); 718 719 sctx.rs = rs; 720 sctx.pv = pv[diag]; 721 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 722 if (sctx.newshift) break; 723 pv[diag] = sctx.pv; 724 } 725 726 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 727 /* 728 * if no shift in this attempt & shifting & started shifting & can refine, 729 * then try lower shift 730 */ 731 sctx.shift_hi = sctx.shift_fraction; 732 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 733 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 734 sctx.newshift = PETSC_TRUE; 735 sctx.nshift++; 736 } 737 } while (sctx.newshift); 738 739 /* invert diagonal entries for simpler triangular solves */ 740 for (i=0; i<n; i++) { 741 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 742 } 743 ierr = PetscFree(rtmp);CHKERRQ(ierr); 744 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 745 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 746 747 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 748 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 749 if (row_identity && col_identity) { 750 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 751 } else { 752 C->ops->solve = MatSolve_SeqAIJ_inplace; 753 } 754 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 755 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 756 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 757 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 758 759 C->assembled = PETSC_TRUE; 760 C->preallocated = PETSC_TRUE; 761 762 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 763 if (sctx.nshift) { 764 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 765 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 766 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 767 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 768 } 769 } 770 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 771 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 772 773 ierr = MatSeqAIJCheckInode(C);CHKERRQ(ierr); 774 PetscFunctionReturn(0); 775 } 776 777 /* 778 This routine implements inplace ILU(0) with row or/and column permutations. 779 Input: 780 A - original matrix 781 Output; 782 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 783 a->j (col index) is permuted by the inverse of colperm, then sorted 784 a->a reordered accordingly with a->j 785 a->diag (ptr to diagonal elements) is updated. 786 */ 787 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 788 { 789 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 790 IS isrow = a->row,isicol = a->icol; 791 PetscErrorCode ierr; 792 const PetscInt *r,*ic,*ics; 793 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 794 PetscInt *ajtmp,nz,row; 795 PetscInt *diag = a->diag,nbdiag,*pj; 796 PetscScalar *rtmp,*pc,multiplier,d; 797 MatScalar *pv,*v; 798 PetscReal rs; 799 FactorShiftCtx sctx; 800 const MatScalar *aa=a->a,*vtmp; 801 802 PetscFunctionBegin; 803 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 804 805 /* MatPivotSetUp(): initialize shift context sctx */ 806 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 807 808 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 809 const PetscInt *ddiag = a->diag; 810 sctx.shift_top = info->zeropivot; 811 for (i=0; i<n; i++) { 812 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 813 d = (aa)[ddiag[i]]; 814 rs = -PetscAbsScalar(d) - PetscRealPart(d); 815 vtmp = aa+ai[i]; 816 nz = ai[i+1] - ai[i]; 817 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 818 if (rs>sctx.shift_top) sctx.shift_top = rs; 819 } 820 sctx.shift_top *= 1.1; 821 sctx.nshift_max = 5; 822 sctx.shift_lo = 0.; 823 sctx.shift_hi = 1.; 824 } 825 826 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 827 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 828 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 829 ierr = PetscArrayzero(rtmp,n+1);CHKERRQ(ierr); 830 ics = ic; 831 832 #if defined(MV) 833 sctx.shift_top = 0.; 834 sctx.nshift_max = 0; 835 sctx.shift_lo = 0.; 836 sctx.shift_hi = 0.; 837 sctx.shift_fraction = 0.; 838 839 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 840 sctx.shift_top = 0.; 841 for (i=0; i<n; i++) { 842 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 843 d = (a->a)[diag[i]]; 844 rs = -PetscAbsScalar(d) - PetscRealPart(d); 845 v = a->a+ai[i]; 846 nz = ai[i+1] - ai[i]; 847 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 848 if (rs>sctx.shift_top) sctx.shift_top = rs; 849 } 850 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 851 sctx.shift_top *= 1.1; 852 sctx.nshift_max = 5; 853 sctx.shift_lo = 0.; 854 sctx.shift_hi = 1.; 855 } 856 857 sctx.shift_amount = 0.; 858 sctx.nshift = 0; 859 #endif 860 861 do { 862 sctx.newshift = PETSC_FALSE; 863 for (i=0; i<n; i++) { 864 /* load in initial unfactored row */ 865 nz = ai[r[i]+1] - ai[r[i]]; 866 ajtmp = aj + ai[r[i]]; 867 v = a->a + ai[r[i]]; 868 /* sort permuted ajtmp and values v accordingly */ 869 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 870 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 871 872 diag[r[i]] = ai[r[i]]; 873 for (j=0; j<nz; j++) { 874 rtmp[ajtmp[j]] = v[j]; 875 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 876 } 877 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 878 879 row = *ajtmp++; 880 while (row < i) { 881 pc = rtmp + row; 882 if (*pc != 0.0) { 883 pv = a->a + diag[r[row]]; 884 pj = aj + diag[r[row]] + 1; 885 886 multiplier = *pc / *pv++; 887 *pc = multiplier; 888 nz = ai[r[row]+1] - diag[r[row]] - 1; 889 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 890 ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr); 891 } 892 row = *ajtmp++; 893 } 894 /* finished row so overwrite it onto a->a */ 895 pv = a->a + ai[r[i]]; 896 pj = aj + ai[r[i]]; 897 nz = ai[r[i]+1] - ai[r[i]]; 898 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 899 900 rs = 0.0; 901 for (j=0; j<nz; j++) { 902 pv[j] = rtmp[pj[j]]; 903 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 904 } 905 906 sctx.rs = rs; 907 sctx.pv = pv[nbdiag]; 908 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 909 if (sctx.newshift) break; 910 pv[nbdiag] = sctx.pv; 911 } 912 913 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 914 /* 915 * if no shift in this attempt & shifting & started shifting & can refine, 916 * then try lower shift 917 */ 918 sctx.shift_hi = sctx.shift_fraction; 919 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 920 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 921 sctx.newshift = PETSC_TRUE; 922 sctx.nshift++; 923 } 924 } while (sctx.newshift); 925 926 /* invert diagonal entries for simpler triangular solves */ 927 for (i=0; i<n; i++) { 928 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 929 } 930 931 ierr = PetscFree(rtmp);CHKERRQ(ierr); 932 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 933 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 934 935 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 936 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 937 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 938 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 939 940 A->assembled = PETSC_TRUE; 941 A->preallocated = PETSC_TRUE; 942 943 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 944 if (sctx.nshift) { 945 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 946 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 947 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 948 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 949 } 950 } 951 PetscFunctionReturn(0); 952 } 953 954 /* ----------------------------------------------------------- */ 955 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 956 { 957 PetscErrorCode ierr; 958 Mat C; 959 960 PetscFunctionBegin; 961 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 962 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 963 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 964 965 A->ops->solve = C->ops->solve; 966 A->ops->solvetranspose = C->ops->solvetranspose; 967 968 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 969 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 /* ----------------------------------------------------------- */ 973 974 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 975 { 976 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 977 IS iscol = a->col,isrow = a->row; 978 PetscErrorCode ierr; 979 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 980 PetscInt nz; 981 const PetscInt *rout,*cout,*r,*c; 982 PetscScalar *x,*tmp,*tmps,sum; 983 const PetscScalar *b; 984 const MatScalar *aa = a->a,*v; 985 986 PetscFunctionBegin; 987 if (!n) PetscFunctionReturn(0); 988 989 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 990 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 991 tmp = a->solve_work; 992 993 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 994 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 995 996 /* forward solve the lower triangular */ 997 tmp[0] = b[*r++]; 998 tmps = tmp; 999 for (i=1; i<n; i++) { 1000 v = aa + ai[i]; 1001 vi = aj + ai[i]; 1002 nz = a->diag[i] - ai[i]; 1003 sum = b[*r++]; 1004 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1005 tmp[i] = sum; 1006 } 1007 1008 /* backward solve the upper triangular */ 1009 for (i=n-1; i>=0; i--) { 1010 v = aa + a->diag[i] + 1; 1011 vi = aj + a->diag[i] + 1; 1012 nz = ai[i+1] - a->diag[i] - 1; 1013 sum = tmp[i]; 1014 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1015 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1016 } 1017 1018 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1019 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1020 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1021 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1022 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1023 PetscFunctionReturn(0); 1024 } 1025 1026 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1027 { 1028 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1029 IS iscol = a->col,isrow = a->row; 1030 PetscErrorCode ierr; 1031 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1032 PetscInt nz,neq,ldb,ldx; 1033 const PetscInt *rout,*cout,*r,*c; 1034 PetscScalar *x,*tmp = a->solve_work,*tmps,sum; 1035 const PetscScalar *b,*aa = a->a,*v; 1036 PetscBool isdense; 1037 1038 PetscFunctionBegin; 1039 if (!n) PetscFunctionReturn(0); 1040 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);CHKERRQ(ierr); 1041 if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1042 if (X != B) { 1043 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);CHKERRQ(ierr); 1044 if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1045 } 1046 ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); 1047 ierr = MatDenseGetLDA(B,&ldb);CHKERRQ(ierr); 1048 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1049 ierr = MatDenseGetLDA(X,&ldx);CHKERRQ(ierr); 1050 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1051 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1052 for (neq=0; neq<B->cmap->n; neq++) { 1053 /* forward solve the lower triangular */ 1054 tmp[0] = b[r[0]]; 1055 tmps = tmp; 1056 for (i=1; i<n; i++) { 1057 v = aa + ai[i]; 1058 vi = aj + ai[i]; 1059 nz = a->diag[i] - ai[i]; 1060 sum = b[r[i]]; 1061 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1062 tmp[i] = sum; 1063 } 1064 /* backward solve the upper triangular */ 1065 for (i=n-1; i>=0; i--) { 1066 v = aa + a->diag[i] + 1; 1067 vi = aj + a->diag[i] + 1; 1068 nz = ai[i+1] - a->diag[i] - 1; 1069 sum = tmp[i]; 1070 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1071 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1072 } 1073 b += ldb; 1074 x += ldx; 1075 } 1076 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1077 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1078 ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); 1079 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1080 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1085 { 1086 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1087 IS iscol = a->col,isrow = a->row; 1088 PetscErrorCode ierr; 1089 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1090 PetscInt nz,neq,ldb,ldx; 1091 const PetscInt *rout,*cout,*r,*c; 1092 PetscScalar *x,*tmp = a->solve_work,sum; 1093 const PetscScalar *b,*aa = a->a,*v; 1094 PetscBool isdense; 1095 1096 PetscFunctionBegin; 1097 if (!n) PetscFunctionReturn(0); 1098 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);CHKERRQ(ierr); 1099 if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1100 if (X != B) { 1101 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);CHKERRQ(ierr); 1102 if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1103 } 1104 ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); 1105 ierr = MatDenseGetLDA(B,&ldb);CHKERRQ(ierr); 1106 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1107 ierr = MatDenseGetLDA(X,&ldx);CHKERRQ(ierr); 1108 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1109 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1110 for (neq=0; neq<B->cmap->n; neq++) { 1111 /* forward solve the lower triangular */ 1112 tmp[0] = b[r[0]]; 1113 v = aa; 1114 vi = aj; 1115 for (i=1; i<n; i++) { 1116 nz = ai[i+1] - ai[i]; 1117 sum = b[r[i]]; 1118 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1119 tmp[i] = sum; 1120 v += nz; vi += nz; 1121 } 1122 /* backward solve the upper triangular */ 1123 for (i=n-1; i>=0; i--) { 1124 v = aa + adiag[i+1]+1; 1125 vi = aj + adiag[i+1]+1; 1126 nz = adiag[i]-adiag[i+1]-1; 1127 sum = tmp[i]; 1128 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1129 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1130 } 1131 b += ldb; 1132 x += ldx; 1133 } 1134 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1135 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1136 ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); 1137 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1138 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1143 { 1144 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1145 IS iscol = a->col,isrow = a->row; 1146 PetscErrorCode ierr; 1147 const PetscInt *r,*c,*rout,*cout; 1148 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1149 PetscInt nz,row; 1150 PetscScalar *x,*tmp,*tmps,sum; 1151 const PetscScalar *b; 1152 const MatScalar *aa = a->a,*v; 1153 1154 PetscFunctionBegin; 1155 if (!n) PetscFunctionReturn(0); 1156 1157 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1158 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1159 tmp = a->solve_work; 1160 1161 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1162 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1163 1164 /* forward solve the lower triangular */ 1165 tmp[0] = b[*r++]; 1166 tmps = tmp; 1167 for (row=1; row<n; row++) { 1168 i = rout[row]; /* permuted row */ 1169 v = aa + ai[i]; 1170 vi = aj + ai[i]; 1171 nz = a->diag[i] - ai[i]; 1172 sum = b[*r++]; 1173 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1174 tmp[row] = sum; 1175 } 1176 1177 /* backward solve the upper triangular */ 1178 for (row=n-1; row>=0; row--) { 1179 i = rout[row]; /* permuted row */ 1180 v = aa + a->diag[i] + 1; 1181 vi = aj + a->diag[i] + 1; 1182 nz = ai[i+1] - a->diag[i] - 1; 1183 sum = tmp[row]; 1184 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1185 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1186 } 1187 1188 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1189 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1190 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1191 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1192 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 /* ----------------------------------------------------------- */ 1197 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1198 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1199 { 1200 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1201 PetscErrorCode ierr; 1202 PetscInt n = A->rmap->n; 1203 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1204 PetscScalar *x; 1205 const PetscScalar *b; 1206 const MatScalar *aa = a->a; 1207 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1208 PetscInt adiag_i,i,nz,ai_i; 1209 const PetscInt *vi; 1210 const MatScalar *v; 1211 PetscScalar sum; 1212 #endif 1213 1214 PetscFunctionBegin; 1215 if (!n) PetscFunctionReturn(0); 1216 1217 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1218 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1219 1220 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1221 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1222 #else 1223 /* forward solve the lower triangular */ 1224 x[0] = b[0]; 1225 for (i=1; i<n; i++) { 1226 ai_i = ai[i]; 1227 v = aa + ai_i; 1228 vi = aj + ai_i; 1229 nz = adiag[i] - ai_i; 1230 sum = b[i]; 1231 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1232 x[i] = sum; 1233 } 1234 1235 /* backward solve the upper triangular */ 1236 for (i=n-1; i>=0; i--) { 1237 adiag_i = adiag[i]; 1238 v = aa + adiag_i + 1; 1239 vi = aj + adiag_i + 1; 1240 nz = ai[i+1] - adiag_i - 1; 1241 sum = x[i]; 1242 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1243 x[i] = sum*aa[adiag_i]; 1244 } 1245 #endif 1246 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1247 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1248 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1253 { 1254 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1255 IS iscol = a->col,isrow = a->row; 1256 PetscErrorCode ierr; 1257 PetscInt i, n = A->rmap->n,j; 1258 PetscInt nz; 1259 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1260 PetscScalar *x,*tmp,sum; 1261 const PetscScalar *b; 1262 const MatScalar *aa = a->a,*v; 1263 1264 PetscFunctionBegin; 1265 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1266 1267 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1268 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1269 tmp = a->solve_work; 1270 1271 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1272 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1273 1274 /* forward solve the lower triangular */ 1275 tmp[0] = b[*r++]; 1276 for (i=1; i<n; i++) { 1277 v = aa + ai[i]; 1278 vi = aj + ai[i]; 1279 nz = a->diag[i] - ai[i]; 1280 sum = b[*r++]; 1281 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1282 tmp[i] = sum; 1283 } 1284 1285 /* backward solve the upper triangular */ 1286 for (i=n-1; i>=0; i--) { 1287 v = aa + a->diag[i] + 1; 1288 vi = aj + a->diag[i] + 1; 1289 nz = ai[i+1] - a->diag[i] - 1; 1290 sum = tmp[i]; 1291 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1292 tmp[i] = sum*aa[a->diag[i]]; 1293 x[*c--] += tmp[i]; 1294 } 1295 1296 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1297 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1298 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1299 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1300 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1305 { 1306 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1307 IS iscol = a->col,isrow = a->row; 1308 PetscErrorCode ierr; 1309 PetscInt i, n = A->rmap->n,j; 1310 PetscInt nz; 1311 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1312 PetscScalar *x,*tmp,sum; 1313 const PetscScalar *b; 1314 const MatScalar *aa = a->a,*v; 1315 1316 PetscFunctionBegin; 1317 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1318 1319 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1320 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1321 tmp = a->solve_work; 1322 1323 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1324 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1325 1326 /* forward solve the lower triangular */ 1327 tmp[0] = b[r[0]]; 1328 v = aa; 1329 vi = aj; 1330 for (i=1; i<n; i++) { 1331 nz = ai[i+1] - ai[i]; 1332 sum = b[r[i]]; 1333 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1334 tmp[i] = sum; 1335 v += nz; 1336 vi += nz; 1337 } 1338 1339 /* backward solve the upper triangular */ 1340 v = aa + adiag[n-1]; 1341 vi = aj + adiag[n-1]; 1342 for (i=n-1; i>=0; i--) { 1343 nz = adiag[i] - adiag[i+1] - 1; 1344 sum = tmp[i]; 1345 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1346 tmp[i] = sum*v[nz]; 1347 x[c[i]] += tmp[i]; 1348 v += nz+1; vi += nz+1; 1349 } 1350 1351 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1352 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1353 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1354 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1355 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1360 { 1361 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1362 IS iscol = a->col,isrow = a->row; 1363 PetscErrorCode ierr; 1364 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1365 PetscInt i,n = A->rmap->n,j; 1366 PetscInt nz; 1367 PetscScalar *x,*tmp,s1; 1368 const MatScalar *aa = a->a,*v; 1369 const PetscScalar *b; 1370 1371 PetscFunctionBegin; 1372 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1373 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1374 tmp = a->solve_work; 1375 1376 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1377 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1378 1379 /* copy the b into temp work space according to permutation */ 1380 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1381 1382 /* forward solve the U^T */ 1383 for (i=0; i<n; i++) { 1384 v = aa + diag[i]; 1385 vi = aj + diag[i] + 1; 1386 nz = ai[i+1] - diag[i] - 1; 1387 s1 = tmp[i]; 1388 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1389 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1390 tmp[i] = s1; 1391 } 1392 1393 /* backward solve the L^T */ 1394 for (i=n-1; i>=0; i--) { 1395 v = aa + diag[i] - 1; 1396 vi = aj + diag[i] - 1; 1397 nz = diag[i] - ai[i]; 1398 s1 = tmp[i]; 1399 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1400 } 1401 1402 /* copy tmp into x according to permutation */ 1403 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1404 1405 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1406 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1407 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1408 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1409 1410 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1415 { 1416 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1417 IS iscol = a->col,isrow = a->row; 1418 PetscErrorCode ierr; 1419 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1420 PetscInt i,n = A->rmap->n,j; 1421 PetscInt nz; 1422 PetscScalar *x,*tmp,s1; 1423 const MatScalar *aa = a->a,*v; 1424 const PetscScalar *b; 1425 1426 PetscFunctionBegin; 1427 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1428 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 1429 tmp = a->solve_work; 1430 1431 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1432 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1433 1434 /* copy the b into temp work space according to permutation */ 1435 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1436 1437 /* forward solve the U^T */ 1438 for (i=0; i<n; i++) { 1439 v = aa + adiag[i+1] + 1; 1440 vi = aj + adiag[i+1] + 1; 1441 nz = adiag[i] - adiag[i+1] - 1; 1442 s1 = tmp[i]; 1443 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1444 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1445 tmp[i] = s1; 1446 } 1447 1448 /* backward solve the L^T */ 1449 for (i=n-1; i>=0; i--) { 1450 v = aa + ai[i]; 1451 vi = aj + ai[i]; 1452 nz = ai[i+1] - ai[i]; 1453 s1 = tmp[i]; 1454 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1455 } 1456 1457 /* copy tmp into x according to permutation */ 1458 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1459 1460 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1461 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1462 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1463 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 1464 1465 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1466 PetscFunctionReturn(0); 1467 } 1468 1469 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1470 { 1471 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1472 IS iscol = a->col,isrow = a->row; 1473 PetscErrorCode ierr; 1474 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1475 PetscInt i,n = A->rmap->n,j; 1476 PetscInt nz; 1477 PetscScalar *x,*tmp,s1; 1478 const MatScalar *aa = a->a,*v; 1479 const PetscScalar *b; 1480 1481 PetscFunctionBegin; 1482 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1483 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1484 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1485 tmp = a->solve_work; 1486 1487 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1488 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1489 1490 /* copy the b into temp work space according to permutation */ 1491 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1492 1493 /* forward solve the U^T */ 1494 for (i=0; i<n; i++) { 1495 v = aa + diag[i]; 1496 vi = aj + diag[i] + 1; 1497 nz = ai[i+1] - diag[i] - 1; 1498 s1 = tmp[i]; 1499 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1500 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1501 tmp[i] = s1; 1502 } 1503 1504 /* backward solve the L^T */ 1505 for (i=n-1; i>=0; i--) { 1506 v = aa + diag[i] - 1; 1507 vi = aj + diag[i] - 1; 1508 nz = diag[i] - ai[i]; 1509 s1 = tmp[i]; 1510 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1511 } 1512 1513 /* copy tmp into x according to permutation */ 1514 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1515 1516 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1517 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1518 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1519 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1520 1521 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1522 PetscFunctionReturn(0); 1523 } 1524 1525 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1526 { 1527 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1528 IS iscol = a->col,isrow = a->row; 1529 PetscErrorCode ierr; 1530 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1531 PetscInt i,n = A->rmap->n,j; 1532 PetscInt nz; 1533 PetscScalar *x,*tmp,s1; 1534 const MatScalar *aa = a->a,*v; 1535 const PetscScalar *b; 1536 1537 PetscFunctionBegin; 1538 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1539 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1540 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1541 tmp = a->solve_work; 1542 1543 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1544 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1545 1546 /* copy the b into temp work space according to permutation */ 1547 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1548 1549 /* forward solve the U^T */ 1550 for (i=0; i<n; i++) { 1551 v = aa + adiag[i+1] + 1; 1552 vi = aj + adiag[i+1] + 1; 1553 nz = adiag[i] - adiag[i+1] - 1; 1554 s1 = tmp[i]; 1555 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1556 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1557 tmp[i] = s1; 1558 } 1559 1560 /* backward solve the L^T */ 1561 for (i=n-1; i>=0; i--) { 1562 v = aa + ai[i]; 1563 vi = aj + ai[i]; 1564 nz = ai[i+1] - ai[i]; 1565 s1 = tmp[i]; 1566 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1567 } 1568 1569 /* copy tmp into x according to permutation */ 1570 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1571 1572 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1573 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1574 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1575 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1576 1577 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1578 PetscFunctionReturn(0); 1579 } 1580 1581 /* ----------------------------------------------------------------*/ 1582 1583 /* 1584 ilu() under revised new data structure. 1585 Factored arrays bj and ba are stored as 1586 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1587 1588 bi=fact->i is an array of size n+1, in which 1589 bi+ 1590 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1591 bi[n]: points to L(n-1,n-1)+1 1592 1593 bdiag=fact->diag is an array of size n+1,in which 1594 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1595 bdiag[n]: points to entry of U(n-1,0)-1 1596 1597 U(i,:) contains bdiag[i] as its last entry, i.e., 1598 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1599 */ 1600 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1601 { 1602 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1603 PetscErrorCode ierr; 1604 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1605 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1606 IS isicol; 1607 1608 PetscFunctionBegin; 1609 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1610 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1611 b = (Mat_SeqAIJ*)(fact)->data; 1612 1613 /* allocate matrix arrays for new data structure */ 1614 ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); 1615 ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1616 1617 b->singlemalloc = PETSC_TRUE; 1618 if (!b->diag) { 1619 ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr); 1620 ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1621 } 1622 bdiag = b->diag; 1623 1624 if (n > 0) { 1625 ierr = PetscArrayzero(b->a,ai[n]);CHKERRQ(ierr); 1626 } 1627 1628 /* set bi and bj with new data structure */ 1629 bi = b->i; 1630 bj = b->j; 1631 1632 /* L part */ 1633 bi[0] = 0; 1634 for (i=0; i<n; i++) { 1635 nz = adiag[i] - ai[i]; 1636 bi[i+1] = bi[i] + nz; 1637 aj = a->j + ai[i]; 1638 for (j=0; j<nz; j++) { 1639 /* *bj = aj[j]; bj++; */ 1640 bj[k++] = aj[j]; 1641 } 1642 } 1643 1644 /* U part */ 1645 bdiag[n] = bi[n]-1; 1646 for (i=n-1; i>=0; i--) { 1647 nz = ai[i+1] - adiag[i] - 1; 1648 aj = a->j + adiag[i] + 1; 1649 for (j=0; j<nz; j++) { 1650 /* *bj = aj[j]; bj++; */ 1651 bj[k++] = aj[j]; 1652 } 1653 /* diag[i] */ 1654 /* *bj = i; bj++; */ 1655 bj[k++] = i; 1656 bdiag[i] = bdiag[i+1] + nz + 1; 1657 } 1658 1659 fact->factortype = MAT_FACTOR_ILU; 1660 fact->info.factor_mallocs = 0; 1661 fact->info.fill_ratio_given = info->fill; 1662 fact->info.fill_ratio_needed = 1.0; 1663 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1664 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1665 1666 b = (Mat_SeqAIJ*)(fact)->data; 1667 b->row = isrow; 1668 b->col = iscol; 1669 b->icol = isicol; 1670 ierr = PetscMalloc1(fact->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1671 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1672 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1673 PetscFunctionReturn(0); 1674 } 1675 1676 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1677 { 1678 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1679 IS isicol; 1680 PetscErrorCode ierr; 1681 const PetscInt *r,*ic; 1682 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1683 PetscInt *bi,*cols,nnz,*cols_lvl; 1684 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1685 PetscInt i,levels,diagonal_fill; 1686 PetscBool col_identity,row_identity,missing; 1687 PetscReal f; 1688 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1689 PetscBT lnkbt; 1690 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1691 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1692 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1693 1694 PetscFunctionBegin; 1695 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1696 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1697 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1698 1699 levels = (PetscInt)info->levels; 1700 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1701 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1702 if (!levels && row_identity && col_identity) { 1703 /* special case: ilu(0) with natural ordering */ 1704 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1705 if (a->inode.size) { 1706 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1707 } 1708 PetscFunctionReturn(0); 1709 } 1710 1711 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1712 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1713 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1714 1715 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1716 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1717 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1718 bi[0] = bdiag[0] = 0; 1719 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1720 1721 /* create a linked list for storing column indices of the active row */ 1722 nlnk = n + 1; 1723 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1724 1725 /* initial FreeSpace size is f*(ai[n]+1) */ 1726 f = info->fill; 1727 diagonal_fill = (PetscInt)info->diagonal_fill; 1728 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 1729 current_space = free_space; 1730 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 1731 current_space_lvl = free_space_lvl; 1732 for (i=0; i<n; i++) { 1733 nzi = 0; 1734 /* copy current row into linked list */ 1735 nnz = ai[r[i]+1] - ai[r[i]]; 1736 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1737 cols = aj + ai[r[i]]; 1738 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1739 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1740 nzi += nlnk; 1741 1742 /* make sure diagonal entry is included */ 1743 if (diagonal_fill && lnk[i] == -1) { 1744 fm = n; 1745 while (lnk[fm] < i) fm = lnk[fm]; 1746 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1747 lnk[fm] = i; 1748 lnk_lvl[i] = 0; 1749 nzi++; dcount++; 1750 } 1751 1752 /* add pivot rows into the active row */ 1753 nzbd = 0; 1754 prow = lnk[n]; 1755 while (prow < i) { 1756 nnz = bdiag[prow]; 1757 cols = bj_ptr[prow] + nnz + 1; 1758 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1759 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1760 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1761 nzi += nlnk; 1762 prow = lnk[prow]; 1763 nzbd++; 1764 } 1765 bdiag[i] = nzbd; 1766 bi[i+1] = bi[i] + nzi; 1767 /* if free space is not available, make more free space */ 1768 if (current_space->local_remaining<nzi) { 1769 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */ 1770 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1771 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1772 reallocs++; 1773 } 1774 1775 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1776 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1777 bj_ptr[i] = current_space->array; 1778 bjlvl_ptr[i] = current_space_lvl->array; 1779 1780 /* make sure the active row i has diagonal entry */ 1781 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1782 1783 current_space->array += nzi; 1784 current_space->local_used += nzi; 1785 current_space->local_remaining -= nzi; 1786 current_space_lvl->array += nzi; 1787 current_space_lvl->local_used += nzi; 1788 current_space_lvl->local_remaining -= nzi; 1789 } 1790 1791 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1792 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1793 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1794 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 1795 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1796 1797 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1798 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1799 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1800 1801 #if defined(PETSC_USE_INFO) 1802 { 1803 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1804 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 1805 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1806 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 1807 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1808 if (diagonal_fill) { 1809 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 1810 } 1811 } 1812 #endif 1813 /* put together the new matrix */ 1814 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1815 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 1816 b = (Mat_SeqAIJ*)(fact)->data; 1817 1818 b->free_a = PETSC_TRUE; 1819 b->free_ij = PETSC_TRUE; 1820 b->singlemalloc = PETSC_FALSE; 1821 1822 ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr); 1823 1824 b->j = bj; 1825 b->i = bi; 1826 b->diag = bdiag; 1827 b->ilen = NULL; 1828 b->imax = NULL; 1829 b->row = isrow; 1830 b->col = iscol; 1831 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1832 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1833 b->icol = isicol; 1834 1835 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 1836 /* In b structure: Free imax, ilen, old a, old j. 1837 Allocate bdiag, solve_work, new a, new j */ 1838 ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1839 b->maxnz = b->nz = bdiag[0]+1; 1840 1841 (fact)->info.factor_mallocs = reallocs; 1842 (fact)->info.fill_ratio_given = f; 1843 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1844 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1845 if (a->inode.size) { 1846 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1847 } 1848 ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr); 1849 PetscFunctionReturn(0); 1850 } 1851 1852 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1853 { 1854 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1855 IS isicol; 1856 PetscErrorCode ierr; 1857 const PetscInt *r,*ic; 1858 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1859 PetscInt *bi,*cols,nnz,*cols_lvl; 1860 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1861 PetscInt i,levels,diagonal_fill; 1862 PetscBool col_identity,row_identity; 1863 PetscReal f; 1864 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1865 PetscBT lnkbt; 1866 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1867 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1868 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1869 PetscBool missing; 1870 1871 PetscFunctionBegin; 1872 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1873 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1874 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1875 1876 f = info->fill; 1877 levels = (PetscInt)info->levels; 1878 diagonal_fill = (PetscInt)info->diagonal_fill; 1879 1880 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1881 1882 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1883 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1884 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1885 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1886 1887 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1888 if (a->inode.size) { 1889 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1890 } 1891 fact->factortype = MAT_FACTOR_ILU; 1892 (fact)->info.factor_mallocs = 0; 1893 (fact)->info.fill_ratio_given = info->fill; 1894 (fact)->info.fill_ratio_needed = 1.0; 1895 1896 b = (Mat_SeqAIJ*)(fact)->data; 1897 b->row = isrow; 1898 b->col = iscol; 1899 b->icol = isicol; 1900 ierr = PetscMalloc1((fact)->rmap->n+1,&b->solve_work);CHKERRQ(ierr); 1901 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1902 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1903 PetscFunctionReturn(0); 1904 } 1905 1906 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1907 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1908 1909 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1910 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 1911 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 1912 bi[0] = bdiag[0] = 0; 1913 1914 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1915 1916 /* create a linked list for storing column indices of the active row */ 1917 nlnk = n + 1; 1918 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1919 1920 /* initial FreeSpace size is f*(ai[n]+1) */ 1921 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 1922 current_space = free_space; 1923 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 1924 current_space_lvl = free_space_lvl; 1925 1926 for (i=0; i<n; i++) { 1927 nzi = 0; 1928 /* copy current row into linked list */ 1929 nnz = ai[r[i]+1] - ai[r[i]]; 1930 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1931 cols = aj + ai[r[i]]; 1932 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1933 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1934 nzi += nlnk; 1935 1936 /* make sure diagonal entry is included */ 1937 if (diagonal_fill && lnk[i] == -1) { 1938 fm = n; 1939 while (lnk[fm] < i) fm = lnk[fm]; 1940 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1941 lnk[fm] = i; 1942 lnk_lvl[i] = 0; 1943 nzi++; dcount++; 1944 } 1945 1946 /* add pivot rows into the active row */ 1947 nzbd = 0; 1948 prow = lnk[n]; 1949 while (prow < i) { 1950 nnz = bdiag[prow]; 1951 cols = bj_ptr[prow] + nnz + 1; 1952 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1953 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1954 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1955 nzi += nlnk; 1956 prow = lnk[prow]; 1957 nzbd++; 1958 } 1959 bdiag[i] = nzbd; 1960 bi[i+1] = bi[i] + nzi; 1961 1962 /* if free space is not available, make more free space */ 1963 if (current_space->local_remaining<nzi) { 1964 nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */ 1965 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1966 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1967 reallocs++; 1968 } 1969 1970 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1971 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1972 bj_ptr[i] = current_space->array; 1973 bjlvl_ptr[i] = current_space_lvl->array; 1974 1975 /* make sure the active row i has diagonal entry */ 1976 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1977 1978 current_space->array += nzi; 1979 current_space->local_used += nzi; 1980 current_space->local_remaining -= nzi; 1981 current_space_lvl->array += nzi; 1982 current_space_lvl->local_used += nzi; 1983 current_space_lvl->local_remaining -= nzi; 1984 } 1985 1986 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1987 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1988 1989 /* destroy list of free space and other temporary arrays */ 1990 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 1991 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 1992 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1993 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1994 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1995 1996 #if defined(PETSC_USE_INFO) 1997 { 1998 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1999 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 2000 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2001 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 2002 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2003 if (diagonal_fill) { 2004 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 2005 } 2006 } 2007 #endif 2008 2009 /* put together the new matrix */ 2010 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2011 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 2012 b = (Mat_SeqAIJ*)(fact)->data; 2013 2014 b->free_a = PETSC_TRUE; 2015 b->free_ij = PETSC_TRUE; 2016 b->singlemalloc = PETSC_FALSE; 2017 2018 ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr); 2019 b->j = bj; 2020 b->i = bi; 2021 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2022 b->diag = bdiag; 2023 b->ilen = NULL; 2024 b->imax = NULL; 2025 b->row = isrow; 2026 b->col = iscol; 2027 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2028 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2029 b->icol = isicol; 2030 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 2031 /* In b structure: Free imax, ilen, old a, old j. 2032 Allocate bdiag, solve_work, new a, new j */ 2033 ierr = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2034 b->maxnz = b->nz = bi[n]; 2035 2036 (fact)->info.factor_mallocs = reallocs; 2037 (fact)->info.fill_ratio_given = f; 2038 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2039 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2040 if (a->inode.size) { 2041 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2042 } 2043 PetscFunctionReturn(0); 2044 } 2045 2046 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2047 { 2048 Mat C = B; 2049 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2050 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2051 IS ip=b->row,iip = b->icol; 2052 PetscErrorCode ierr; 2053 const PetscInt *rip,*riip; 2054 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2055 PetscInt *ai=a->i,*aj=a->j; 2056 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2057 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2058 PetscBool perm_identity; 2059 FactorShiftCtx sctx; 2060 PetscReal rs; 2061 MatScalar d,*v; 2062 2063 PetscFunctionBegin; 2064 /* MatPivotSetUp(): initialize shift context sctx */ 2065 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2066 2067 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2068 sctx.shift_top = info->zeropivot; 2069 for (i=0; i<mbs; i++) { 2070 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2071 d = (aa)[a->diag[i]]; 2072 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2073 v = aa+ai[i]; 2074 nz = ai[i+1] - ai[i]; 2075 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2076 if (rs>sctx.shift_top) sctx.shift_top = rs; 2077 } 2078 sctx.shift_top *= 1.1; 2079 sctx.nshift_max = 5; 2080 sctx.shift_lo = 0.; 2081 sctx.shift_hi = 1.; 2082 } 2083 2084 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2085 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2086 2087 /* allocate working arrays 2088 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2089 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2090 */ 2091 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr); 2092 2093 do { 2094 sctx.newshift = PETSC_FALSE; 2095 2096 for (i=0; i<mbs; i++) c2r[i] = mbs; 2097 if (mbs) il[0] = 0; 2098 2099 for (k = 0; k<mbs; k++) { 2100 /* zero rtmp */ 2101 nz = bi[k+1] - bi[k]; 2102 bjtmp = bj + bi[k]; 2103 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2104 2105 /* load in initial unfactored row */ 2106 bval = ba + bi[k]; 2107 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2108 for (j = jmin; j < jmax; j++) { 2109 col = riip[aj[j]]; 2110 if (col >= k) { /* only take upper triangular entry */ 2111 rtmp[col] = aa[j]; 2112 *bval++ = 0.0; /* for in-place factorization */ 2113 } 2114 } 2115 /* shift the diagonal of the matrix: ZeropivotApply() */ 2116 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2117 2118 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2119 dk = rtmp[k]; 2120 i = c2r[k]; /* first row to be added to k_th row */ 2121 2122 while (i < k) { 2123 nexti = c2r[i]; /* next row to be added to k_th row */ 2124 2125 /* compute multiplier, update diag(k) and U(i,k) */ 2126 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2127 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2128 dk += uikdi*ba[ili]; /* update diag[k] */ 2129 ba[ili] = uikdi; /* -U(i,k) */ 2130 2131 /* add multiple of row i to k-th row */ 2132 jmin = ili + 1; jmax = bi[i+1]; 2133 if (jmin < jmax) { 2134 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2135 /* update il and c2r for row i */ 2136 il[i] = jmin; 2137 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2138 } 2139 i = nexti; 2140 } 2141 2142 /* copy data into U(k,:) */ 2143 rs = 0.0; 2144 jmin = bi[k]; jmax = bi[k+1]-1; 2145 if (jmin < jmax) { 2146 for (j=jmin; j<jmax; j++) { 2147 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2148 } 2149 /* add the k-th row into il and c2r */ 2150 il[k] = jmin; 2151 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2152 } 2153 2154 /* MatPivotCheck() */ 2155 sctx.rs = rs; 2156 sctx.pv = dk; 2157 ierr = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr); 2158 if (sctx.newshift) break; 2159 dk = sctx.pv; 2160 2161 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2162 } 2163 } while (sctx.newshift); 2164 2165 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2166 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2167 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2168 2169 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2170 if (perm_identity) { 2171 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2172 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2173 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2174 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2175 } else { 2176 B->ops->solve = MatSolve_SeqSBAIJ_1; 2177 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2178 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2179 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2180 } 2181 2182 C->assembled = PETSC_TRUE; 2183 C->preallocated = PETSC_TRUE; 2184 2185 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2186 2187 /* MatPivotView() */ 2188 if (sctx.nshift) { 2189 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2190 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 2191 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2192 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2193 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2194 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 2195 } 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2201 { 2202 Mat C = B; 2203 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2204 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2205 IS ip=b->row,iip = b->icol; 2206 PetscErrorCode ierr; 2207 const PetscInt *rip,*riip; 2208 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2209 PetscInt *ai=a->i,*aj=a->j; 2210 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2211 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2212 PetscBool perm_identity; 2213 FactorShiftCtx sctx; 2214 PetscReal rs; 2215 MatScalar d,*v; 2216 2217 PetscFunctionBegin; 2218 /* MatPivotSetUp(): initialize shift context sctx */ 2219 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2220 2221 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2222 sctx.shift_top = info->zeropivot; 2223 for (i=0; i<mbs; i++) { 2224 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2225 d = (aa)[a->diag[i]]; 2226 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2227 v = aa+ai[i]; 2228 nz = ai[i+1] - ai[i]; 2229 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2230 if (rs>sctx.shift_top) sctx.shift_top = rs; 2231 } 2232 sctx.shift_top *= 1.1; 2233 sctx.nshift_max = 5; 2234 sctx.shift_lo = 0.; 2235 sctx.shift_hi = 1.; 2236 } 2237 2238 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2239 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2240 2241 /* initialization */ 2242 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr); 2243 2244 do { 2245 sctx.newshift = PETSC_FALSE; 2246 2247 for (i=0; i<mbs; i++) jl[i] = mbs; 2248 il[0] = 0; 2249 2250 for (k = 0; k<mbs; k++) { 2251 /* zero rtmp */ 2252 nz = bi[k+1] - bi[k]; 2253 bjtmp = bj + bi[k]; 2254 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2255 2256 bval = ba + bi[k]; 2257 /* initialize k-th row by the perm[k]-th row of A */ 2258 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2259 for (j = jmin; j < jmax; j++) { 2260 col = riip[aj[j]]; 2261 if (col >= k) { /* only take upper triangular entry */ 2262 rtmp[col] = aa[j]; 2263 *bval++ = 0.0; /* for in-place factorization */ 2264 } 2265 } 2266 /* shift the diagonal of the matrix */ 2267 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2268 2269 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2270 dk = rtmp[k]; 2271 i = jl[k]; /* first row to be added to k_th row */ 2272 2273 while (i < k) { 2274 nexti = jl[i]; /* next row to be added to k_th row */ 2275 2276 /* compute multiplier, update diag(k) and U(i,k) */ 2277 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2278 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2279 dk += uikdi*ba[ili]; 2280 ba[ili] = uikdi; /* -U(i,k) */ 2281 2282 /* add multiple of row i to k-th row */ 2283 jmin = ili + 1; jmax = bi[i+1]; 2284 if (jmin < jmax) { 2285 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2286 /* update il and jl for row i */ 2287 il[i] = jmin; 2288 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2289 } 2290 i = nexti; 2291 } 2292 2293 /* shift the diagonals when zero pivot is detected */ 2294 /* compute rs=sum of abs(off-diagonal) */ 2295 rs = 0.0; 2296 jmin = bi[k]+1; 2297 nz = bi[k+1] - jmin; 2298 bcol = bj + jmin; 2299 for (j=0; j<nz; j++) { 2300 rs += PetscAbsScalar(rtmp[bcol[j]]); 2301 } 2302 2303 sctx.rs = rs; 2304 sctx.pv = dk; 2305 ierr = MatPivotCheck(B,A,info,&sctx,k);CHKERRQ(ierr); 2306 if (sctx.newshift) break; 2307 dk = sctx.pv; 2308 2309 /* copy data into U(k,:) */ 2310 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2311 jmin = bi[k]+1; jmax = bi[k+1]; 2312 if (jmin < jmax) { 2313 for (j=jmin; j<jmax; j++) { 2314 col = bj[j]; ba[j] = rtmp[col]; 2315 } 2316 /* add the k-th row into il and jl */ 2317 il[k] = jmin; 2318 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2319 } 2320 } 2321 } while (sctx.newshift); 2322 2323 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2324 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2325 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2326 2327 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2328 if (perm_identity) { 2329 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2330 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2331 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2332 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2333 } else { 2334 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2335 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2336 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2337 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2338 } 2339 2340 C->assembled = PETSC_TRUE; 2341 C->preallocated = PETSC_TRUE; 2342 2343 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2344 if (sctx.nshift) { 2345 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2346 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2347 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2348 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2349 } 2350 } 2351 PetscFunctionReturn(0); 2352 } 2353 2354 /* 2355 icc() under revised new data structure. 2356 Factored arrays bj and ba are stored as 2357 U(0,:),...,U(i,:),U(n-1,:) 2358 2359 ui=fact->i is an array of size n+1, in which 2360 ui+ 2361 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2362 ui[n]: points to U(n-1,n-1)+1 2363 2364 udiag=fact->diag is an array of size n,in which 2365 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2366 2367 U(i,:) contains udiag[i] as its last entry, i.e., 2368 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2369 */ 2370 2371 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2372 { 2373 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2374 Mat_SeqSBAIJ *b; 2375 PetscErrorCode ierr; 2376 PetscBool perm_identity,missing; 2377 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2378 const PetscInt *rip,*riip; 2379 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2380 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2381 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2382 PetscReal fill =info->fill,levels=info->levels; 2383 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2384 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2385 PetscBT lnkbt; 2386 IS iperm; 2387 2388 PetscFunctionBegin; 2389 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2390 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2391 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2392 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2393 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2394 2395 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2396 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2397 ui[0] = 0; 2398 2399 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2400 if (!levels && perm_identity) { 2401 for (i=0; i<am; i++) { 2402 ncols = ai[i+1] - a->diag[i]; 2403 ui[i+1] = ui[i] + ncols; 2404 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2405 } 2406 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2407 cols = uj; 2408 for (i=0; i<am; i++) { 2409 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2410 ncols = ai[i+1] - a->diag[i] -1; 2411 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2412 *cols++ = i; /* diagonal is located as the last entry of U(i,:) */ 2413 } 2414 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2415 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2416 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2417 2418 /* initialization */ 2419 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2420 2421 /* jl: linked list for storing indices of the pivot rows 2422 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2423 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2424 for (i=0; i<am; i++) { 2425 jl[i] = am; il[i] = 0; 2426 } 2427 2428 /* create and initialize a linked list for storing column indices of the active row k */ 2429 nlnk = am + 1; 2430 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2431 2432 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2433 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2434 current_space = free_space; 2435 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2436 current_space_lvl = free_space_lvl; 2437 2438 for (k=0; k<am; k++) { /* for each active row k */ 2439 /* initialize lnk by the column indices of row rip[k] of A */ 2440 nzk = 0; 2441 ncols = ai[rip[k]+1] - ai[rip[k]]; 2442 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2443 ncols_upper = 0; 2444 for (j=0; j<ncols; j++) { 2445 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2446 if (riip[i] >= k) { /* only take upper triangular entry */ 2447 ajtmp[ncols_upper] = i; 2448 ncols_upper++; 2449 } 2450 } 2451 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2452 nzk += nlnk; 2453 2454 /* update lnk by computing fill-in for each pivot row to be merged in */ 2455 prow = jl[k]; /* 1st pivot row */ 2456 2457 while (prow < k) { 2458 nextprow = jl[prow]; 2459 2460 /* merge prow into k-th row */ 2461 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2462 jmax = ui[prow+1]; 2463 ncols = jmax-jmin; 2464 i = jmin - ui[prow]; 2465 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2466 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2467 j = *(uj - 1); 2468 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2469 nzk += nlnk; 2470 2471 /* update il and jl for prow */ 2472 if (jmin < jmax) { 2473 il[prow] = jmin; 2474 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2475 } 2476 prow = nextprow; 2477 } 2478 2479 /* if free space is not available, make more free space */ 2480 if (current_space->local_remaining<nzk) { 2481 i = am - k + 1; /* num of unfactored rows */ 2482 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2483 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2484 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2485 reallocs++; 2486 } 2487 2488 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2489 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2490 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2491 2492 /* add the k-th row into il and jl */ 2493 if (nzk > 1) { 2494 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2495 jl[k] = jl[i]; jl[i] = k; 2496 il[k] = ui[k] + 1; 2497 } 2498 uj_ptr[k] = current_space->array; 2499 uj_lvl_ptr[k] = current_space_lvl->array; 2500 2501 current_space->array += nzk; 2502 current_space->local_used += nzk; 2503 current_space->local_remaining -= nzk; 2504 2505 current_space_lvl->array += nzk; 2506 current_space_lvl->local_used += nzk; 2507 current_space_lvl->local_remaining -= nzk; 2508 2509 ui[k+1] = ui[k] + nzk; 2510 } 2511 2512 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2513 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2514 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2515 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2516 2517 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2518 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2519 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2520 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2521 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2522 2523 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2524 2525 /* put together the new matrix in MATSEQSBAIJ format */ 2526 b = (Mat_SeqSBAIJ*)(fact)->data; 2527 b->singlemalloc = PETSC_FALSE; 2528 2529 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2530 2531 b->j = uj; 2532 b->i = ui; 2533 b->diag = udiag; 2534 b->free_diag = PETSC_TRUE; 2535 b->ilen = NULL; 2536 b->imax = NULL; 2537 b->row = perm; 2538 b->col = perm; 2539 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2540 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2541 b->icol = iperm; 2542 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2543 2544 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2545 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2546 2547 b->maxnz = b->nz = ui[am]; 2548 b->free_a = PETSC_TRUE; 2549 b->free_ij = PETSC_TRUE; 2550 2551 fact->info.factor_mallocs = reallocs; 2552 fact->info.fill_ratio_given = fill; 2553 if (ai[am] != 0) { 2554 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2555 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2556 } else { 2557 fact->info.fill_ratio_needed = 0.0; 2558 } 2559 #if defined(PETSC_USE_INFO) 2560 if (ai[am] != 0) { 2561 PetscReal af = fact->info.fill_ratio_needed; 2562 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2563 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2564 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2565 } else { 2566 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 2567 } 2568 #endif 2569 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2570 PetscFunctionReturn(0); 2571 } 2572 2573 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2574 { 2575 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2576 Mat_SeqSBAIJ *b; 2577 PetscErrorCode ierr; 2578 PetscBool perm_identity,missing; 2579 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2580 const PetscInt *rip,*riip; 2581 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2582 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2583 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2584 PetscReal fill =info->fill,levels=info->levels; 2585 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2586 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2587 PetscBT lnkbt; 2588 IS iperm; 2589 2590 PetscFunctionBegin; 2591 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2592 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2593 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2594 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2595 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2596 2597 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2598 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2599 ui[0] = 0; 2600 2601 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2602 if (!levels && perm_identity) { 2603 2604 for (i=0; i<am; i++) { 2605 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2606 udiag[i] = ui[i]; 2607 } 2608 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2609 cols = uj; 2610 for (i=0; i<am; i++) { 2611 aj = a->j + a->diag[i]; 2612 ncols = ui[i+1] - ui[i]; 2613 for (j=0; j<ncols; j++) *cols++ = *aj++; 2614 } 2615 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2616 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2617 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2618 2619 /* initialization */ 2620 ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr); 2621 2622 /* jl: linked list for storing indices of the pivot rows 2623 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2624 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2625 for (i=0; i<am; i++) { 2626 jl[i] = am; il[i] = 0; 2627 } 2628 2629 /* create and initialize a linked list for storing column indices of the active row k */ 2630 nlnk = am + 1; 2631 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2632 2633 /* initial FreeSpace size is fill*(ai[am]+1) */ 2634 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr); 2635 current_space = free_space; 2636 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);CHKERRQ(ierr); 2637 current_space_lvl = free_space_lvl; 2638 2639 for (k=0; k<am; k++) { /* for each active row k */ 2640 /* initialize lnk by the column indices of row rip[k] of A */ 2641 nzk = 0; 2642 ncols = ai[rip[k]+1] - ai[rip[k]]; 2643 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2644 ncols_upper = 0; 2645 for (j=0; j<ncols; j++) { 2646 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2647 if (riip[i] >= k) { /* only take upper triangular entry */ 2648 ajtmp[ncols_upper] = i; 2649 ncols_upper++; 2650 } 2651 } 2652 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2653 nzk += nlnk; 2654 2655 /* update lnk by computing fill-in for each pivot row to be merged in */ 2656 prow = jl[k]; /* 1st pivot row */ 2657 2658 while (prow < k) { 2659 nextprow = jl[prow]; 2660 2661 /* merge prow into k-th row */ 2662 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2663 jmax = ui[prow+1]; 2664 ncols = jmax-jmin; 2665 i = jmin - ui[prow]; 2666 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2667 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2668 j = *(uj - 1); 2669 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2670 nzk += nlnk; 2671 2672 /* update il and jl for prow */ 2673 if (jmin < jmax) { 2674 il[prow] = jmin; 2675 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2676 } 2677 prow = nextprow; 2678 } 2679 2680 /* if free space is not available, make more free space */ 2681 if (current_space->local_remaining<nzk) { 2682 i = am - k + 1; /* num of unfactored rows */ 2683 i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2684 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2685 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2686 reallocs++; 2687 } 2688 2689 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2690 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2691 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2692 2693 /* add the k-th row into il and jl */ 2694 if (nzk > 1) { 2695 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2696 jl[k] = jl[i]; jl[i] = k; 2697 il[k] = ui[k] + 1; 2698 } 2699 uj_ptr[k] = current_space->array; 2700 uj_lvl_ptr[k] = current_space_lvl->array; 2701 2702 current_space->array += nzk; 2703 current_space->local_used += nzk; 2704 current_space->local_remaining -= nzk; 2705 2706 current_space_lvl->array += nzk; 2707 current_space_lvl->local_used += nzk; 2708 current_space_lvl->local_remaining -= nzk; 2709 2710 ui[k+1] = ui[k] + nzk; 2711 } 2712 2713 #if defined(PETSC_USE_INFO) 2714 if (ai[am] != 0) { 2715 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2716 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2717 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2718 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2719 } else { 2720 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 2721 } 2722 #endif 2723 2724 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2725 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2726 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2727 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2728 2729 /* destroy list of free space and other temporary array(s) */ 2730 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2731 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2732 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2733 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2734 2735 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2736 2737 /* put together the new matrix in MATSEQSBAIJ format */ 2738 2739 b = (Mat_SeqSBAIJ*)fact->data; 2740 b->singlemalloc = PETSC_FALSE; 2741 2742 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2743 2744 b->j = uj; 2745 b->i = ui; 2746 b->diag = udiag; 2747 b->free_diag = PETSC_TRUE; 2748 b->ilen = NULL; 2749 b->imax = NULL; 2750 b->row = perm; 2751 b->col = perm; 2752 2753 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2754 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2755 2756 b->icol = iperm; 2757 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2758 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2759 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2760 b->maxnz = b->nz = ui[am]; 2761 b->free_a = PETSC_TRUE; 2762 b->free_ij = PETSC_TRUE; 2763 2764 fact->info.factor_mallocs = reallocs; 2765 fact->info.fill_ratio_given = fill; 2766 if (ai[am] != 0) { 2767 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2768 } else { 2769 fact->info.fill_ratio_needed = 0.0; 2770 } 2771 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2772 PetscFunctionReturn(0); 2773 } 2774 2775 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2776 { 2777 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2778 Mat_SeqSBAIJ *b; 2779 PetscErrorCode ierr; 2780 PetscBool perm_identity,missing; 2781 PetscReal fill = info->fill; 2782 const PetscInt *rip,*riip; 2783 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2784 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2785 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2786 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2787 PetscBT lnkbt; 2788 IS iperm; 2789 2790 PetscFunctionBegin; 2791 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2792 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2793 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2794 2795 /* check whether perm is the identity mapping */ 2796 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2797 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2798 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2799 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2800 2801 /* initialization */ 2802 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2803 ierr = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr); 2804 ui[0] = 0; 2805 2806 /* jl: linked list for storing indices of the pivot rows 2807 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2808 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2809 for (i=0; i<am; i++) { 2810 jl[i] = am; il[i] = 0; 2811 } 2812 2813 /* create and initialize a linked list for storing column indices of the active row k */ 2814 nlnk = am + 1; 2815 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2816 2817 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2818 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2819 current_space = free_space; 2820 2821 for (k=0; k<am; k++) { /* for each active row k */ 2822 /* initialize lnk by the column indices of row rip[k] of A */ 2823 nzk = 0; 2824 ncols = ai[rip[k]+1] - ai[rip[k]]; 2825 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2826 ncols_upper = 0; 2827 for (j=0; j<ncols; j++) { 2828 i = riip[*(aj + ai[rip[k]] + j)]; 2829 if (i >= k) { /* only take upper triangular entry */ 2830 cols[ncols_upper] = i; 2831 ncols_upper++; 2832 } 2833 } 2834 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2835 nzk += nlnk; 2836 2837 /* update lnk by computing fill-in for each pivot row to be merged in */ 2838 prow = jl[k]; /* 1st pivot row */ 2839 2840 while (prow < k) { 2841 nextprow = jl[prow]; 2842 /* merge prow into k-th row */ 2843 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2844 jmax = ui[prow+1]; 2845 ncols = jmax-jmin; 2846 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2847 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2848 nzk += nlnk; 2849 2850 /* update il and jl for prow */ 2851 if (jmin < jmax) { 2852 il[prow] = jmin; 2853 j = *uj_ptr; 2854 jl[prow] = jl[j]; 2855 jl[j] = prow; 2856 } 2857 prow = nextprow; 2858 } 2859 2860 /* if free space is not available, make more free space */ 2861 if (current_space->local_remaining<nzk) { 2862 i = am - k + 1; /* num of unfactored rows */ 2863 i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2864 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2865 reallocs++; 2866 } 2867 2868 /* copy data into free space, then initialize lnk */ 2869 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2870 2871 /* add the k-th row into il and jl */ 2872 if (nzk > 1) { 2873 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2874 jl[k] = jl[i]; jl[i] = k; 2875 il[k] = ui[k] + 1; 2876 } 2877 ui_ptr[k] = current_space->array; 2878 2879 current_space->array += nzk; 2880 current_space->local_used += nzk; 2881 current_space->local_remaining -= nzk; 2882 2883 ui[k+1] = ui[k] + nzk; 2884 } 2885 2886 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2887 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2888 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2889 2890 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2891 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 2892 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2893 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2894 2895 /* put together the new matrix in MATSEQSBAIJ format */ 2896 2897 b = (Mat_SeqSBAIJ*)fact->data; 2898 b->singlemalloc = PETSC_FALSE; 2899 b->free_a = PETSC_TRUE; 2900 b->free_ij = PETSC_TRUE; 2901 2902 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 2903 2904 b->j = uj; 2905 b->i = ui; 2906 b->diag = udiag; 2907 b->free_diag = PETSC_TRUE; 2908 b->ilen = NULL; 2909 b->imax = NULL; 2910 b->row = perm; 2911 b->col = perm; 2912 2913 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2914 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2915 2916 b->icol = iperm; 2917 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2918 2919 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 2920 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2921 2922 b->maxnz = b->nz = ui[am]; 2923 2924 fact->info.factor_mallocs = reallocs; 2925 fact->info.fill_ratio_given = fill; 2926 if (ai[am] != 0) { 2927 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2928 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2929 } else { 2930 fact->info.fill_ratio_needed = 0.0; 2931 } 2932 #if defined(PETSC_USE_INFO) 2933 if (ai[am] != 0) { 2934 PetscReal af = fact->info.fill_ratio_needed; 2935 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2936 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2937 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2938 } else { 2939 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 2940 } 2941 #endif 2942 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2943 PetscFunctionReturn(0); 2944 } 2945 2946 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2947 { 2948 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2949 Mat_SeqSBAIJ *b; 2950 PetscErrorCode ierr; 2951 PetscBool perm_identity,missing; 2952 PetscReal fill = info->fill; 2953 const PetscInt *rip,*riip; 2954 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2955 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2956 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2957 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2958 PetscBT lnkbt; 2959 IS iperm; 2960 2961 PetscFunctionBegin; 2962 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2963 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2964 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2965 2966 /* check whether perm is the identity mapping */ 2967 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2968 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2969 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2970 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2971 2972 /* initialization */ 2973 ierr = PetscMalloc1(am+1,&ui);CHKERRQ(ierr); 2974 ui[0] = 0; 2975 2976 /* jl: linked list for storing indices of the pivot rows 2977 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2978 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2979 for (i=0; i<am; i++) { 2980 jl[i] = am; il[i] = 0; 2981 } 2982 2983 /* create and initialize a linked list for storing column indices of the active row k */ 2984 nlnk = am + 1; 2985 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2986 2987 /* initial FreeSpace size is fill*(ai[am]+1) */ 2988 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr); 2989 current_space = free_space; 2990 2991 for (k=0; k<am; k++) { /* for each active row k */ 2992 /* initialize lnk by the column indices of row rip[k] of A */ 2993 nzk = 0; 2994 ncols = ai[rip[k]+1] - ai[rip[k]]; 2995 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2996 ncols_upper = 0; 2997 for (j=0; j<ncols; j++) { 2998 i = riip[*(aj + ai[rip[k]] + j)]; 2999 if (i >= k) { /* only take upper triangular entry */ 3000 cols[ncols_upper] = i; 3001 ncols_upper++; 3002 } 3003 } 3004 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3005 nzk += nlnk; 3006 3007 /* update lnk by computing fill-in for each pivot row to be merged in */ 3008 prow = jl[k]; /* 1st pivot row */ 3009 3010 while (prow < k) { 3011 nextprow = jl[prow]; 3012 /* merge prow into k-th row */ 3013 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3014 jmax = ui[prow+1]; 3015 ncols = jmax-jmin; 3016 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3017 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3018 nzk += nlnk; 3019 3020 /* update il and jl for prow */ 3021 if (jmin < jmax) { 3022 il[prow] = jmin; 3023 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3024 } 3025 prow = nextprow; 3026 } 3027 3028 /* if free space is not available, make more free space */ 3029 if (current_space->local_remaining<nzk) { 3030 i = am - k + 1; /* num of unfactored rows */ 3031 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3032 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3033 reallocs++; 3034 } 3035 3036 /* copy data into free space, then initialize lnk */ 3037 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3038 3039 /* add the k-th row into il and jl */ 3040 if (nzk-1 > 0) { 3041 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3042 jl[k] = jl[i]; jl[i] = k; 3043 il[k] = ui[k] + 1; 3044 } 3045 ui_ptr[k] = current_space->array; 3046 3047 current_space->array += nzk; 3048 current_space->local_used += nzk; 3049 current_space->local_remaining -= nzk; 3050 3051 ui[k+1] = ui[k] + nzk; 3052 } 3053 3054 #if defined(PETSC_USE_INFO) 3055 if (ai[am] != 0) { 3056 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3057 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 3058 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 3059 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 3060 } else { 3061 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 3062 } 3063 #endif 3064 3065 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3066 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3067 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3068 3069 /* destroy list of free space and other temporary array(s) */ 3070 ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr); 3071 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3072 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3073 3074 /* put together the new matrix in MATSEQSBAIJ format */ 3075 3076 b = (Mat_SeqSBAIJ*)fact->data; 3077 b->singlemalloc = PETSC_FALSE; 3078 b->free_a = PETSC_TRUE; 3079 b->free_ij = PETSC_TRUE; 3080 3081 ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr); 3082 3083 b->j = uj; 3084 b->i = ui; 3085 b->diag = NULL; 3086 b->ilen = NULL; 3087 b->imax = NULL; 3088 b->row = perm; 3089 b->col = perm; 3090 3091 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3092 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3093 3094 b->icol = iperm; 3095 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3096 3097 ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr); 3098 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3099 b->maxnz = b->nz = ui[am]; 3100 3101 fact->info.factor_mallocs = reallocs; 3102 fact->info.fill_ratio_given = fill; 3103 if (ai[am] != 0) { 3104 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3105 } else { 3106 fact->info.fill_ratio_needed = 0.0; 3107 } 3108 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3109 PetscFunctionReturn(0); 3110 } 3111 3112 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3113 { 3114 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3115 PetscErrorCode ierr; 3116 PetscInt n = A->rmap->n; 3117 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3118 PetscScalar *x,sum; 3119 const PetscScalar *b; 3120 const MatScalar *aa = a->a,*v; 3121 PetscInt i,nz; 3122 3123 PetscFunctionBegin; 3124 if (!n) PetscFunctionReturn(0); 3125 3126 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3127 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 3128 3129 /* forward solve the lower triangular */ 3130 x[0] = b[0]; 3131 v = aa; 3132 vi = aj; 3133 for (i=1; i<n; i++) { 3134 nz = ai[i+1] - ai[i]; 3135 sum = b[i]; 3136 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3137 v += nz; 3138 vi += nz; 3139 x[i] = sum; 3140 } 3141 3142 /* backward solve the upper triangular */ 3143 for (i=n-1; i>=0; i--) { 3144 v = aa + adiag[i+1] + 1; 3145 vi = aj + adiag[i+1] + 1; 3146 nz = adiag[i] - adiag[i+1]-1; 3147 sum = x[i]; 3148 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3149 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3150 } 3151 3152 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3153 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3154 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 3155 PetscFunctionReturn(0); 3156 } 3157 3158 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3159 { 3160 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3161 IS iscol = a->col,isrow = a->row; 3162 PetscErrorCode ierr; 3163 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3164 const PetscInt *rout,*cout,*r,*c; 3165 PetscScalar *x,*tmp,sum; 3166 const PetscScalar *b; 3167 const MatScalar *aa = a->a,*v; 3168 3169 PetscFunctionBegin; 3170 if (!n) PetscFunctionReturn(0); 3171 3172 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3173 ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr); 3174 tmp = a->solve_work; 3175 3176 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3177 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3178 3179 /* forward solve the lower triangular */ 3180 tmp[0] = b[r[0]]; 3181 v = aa; 3182 vi = aj; 3183 for (i=1; i<n; i++) { 3184 nz = ai[i+1] - ai[i]; 3185 sum = b[r[i]]; 3186 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3187 tmp[i] = sum; 3188 v += nz; vi += nz; 3189 } 3190 3191 /* backward solve the upper triangular */ 3192 for (i=n-1; i>=0; i--) { 3193 v = aa + adiag[i+1]+1; 3194 vi = aj + adiag[i+1]+1; 3195 nz = adiag[i]-adiag[i+1]-1; 3196 sum = tmp[i]; 3197 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3198 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3199 } 3200 3201 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3202 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3203 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3204 ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr); 3205 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3206 PetscFunctionReturn(0); 3207 } 3208 3209 /* 3210 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3211 */ 3212 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3213 { 3214 Mat B = *fact; 3215 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3216 IS isicol; 3217 PetscErrorCode ierr; 3218 const PetscInt *r,*ic; 3219 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3220 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3221 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3222 PetscInt nlnk,*lnk; 3223 PetscBT lnkbt; 3224 PetscBool row_identity,icol_identity; 3225 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3226 const PetscInt *ics; 3227 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3228 PetscReal dt =info->dt,shift=info->shiftamount; 3229 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3230 PetscBool missing; 3231 3232 PetscFunctionBegin; 3233 if (dt == PETSC_DEFAULT) dt = 0.005; 3234 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3235 3236 /* ------- symbolic factorization, can be reused ---------*/ 3237 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3238 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3239 adiag=a->diag; 3240 3241 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3242 3243 /* bdiag is location of diagonal in factor */ 3244 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3245 ierr = PetscMalloc1(n+1,&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3246 3247 /* allocate row pointers bi */ 3248 ierr = PetscMalloc1(2*n+2,&bi);CHKERRQ(ierr); 3249 3250 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3251 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3252 nnz_max = ai[n]+2*n*dtcount+2; 3253 3254 ierr = PetscMalloc1(nnz_max+1,&bj);CHKERRQ(ierr); 3255 ierr = PetscMalloc1(nnz_max+1,&ba);CHKERRQ(ierr); 3256 3257 /* put together the new matrix */ 3258 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3259 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 3260 b = (Mat_SeqAIJ*)B->data; 3261 3262 b->free_a = PETSC_TRUE; 3263 b->free_ij = PETSC_TRUE; 3264 b->singlemalloc = PETSC_FALSE; 3265 3266 b->a = ba; 3267 b->j = bj; 3268 b->i = bi; 3269 b->diag = bdiag; 3270 b->ilen = NULL; 3271 b->imax = NULL; 3272 b->row = isrow; 3273 b->col = iscol; 3274 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3275 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3276 b->icol = isicol; 3277 3278 ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr); 3279 ierr = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3280 b->maxnz = nnz_max; 3281 3282 B->factortype = MAT_FACTOR_ILUDT; 3283 B->info.factor_mallocs = 0; 3284 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3285 /* ------- end of symbolic factorization ---------*/ 3286 3287 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3288 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3289 ics = ic; 3290 3291 /* linked list for storing column indices of the active row */ 3292 nlnk = n + 1; 3293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3294 3295 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3296 ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr); 3297 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3298 ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr); 3299 ierr = PetscArrayzero(rtmp,n);CHKERRQ(ierr); 3300 3301 bi[0] = 0; 3302 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3303 bdiag_rev[n] = bdiag[0]; 3304 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3305 for (i=0; i<n; i++) { 3306 /* copy initial fill into linked list */ 3307 nzi = ai[r[i]+1] - ai[r[i]]; 3308 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3309 nzi_al = adiag[r[i]] - ai[r[i]]; 3310 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3311 ajtmp = aj + ai[r[i]]; 3312 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3313 3314 /* load in initial (unfactored row) */ 3315 aatmp = a->a + ai[r[i]]; 3316 for (j=0; j<nzi; j++) { 3317 rtmp[ics[*ajtmp++]] = *aatmp++; 3318 } 3319 3320 /* add pivot rows into linked list */ 3321 row = lnk[n]; 3322 while (row < i) { 3323 nzi_bl = bi[row+1] - bi[row] + 1; 3324 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3325 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3326 nzi += nlnk; 3327 row = lnk[row]; 3328 } 3329 3330 /* copy data from lnk into jtmp, then initialize lnk */ 3331 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3332 3333 /* numerical factorization */ 3334 bjtmp = jtmp; 3335 row = *bjtmp++; /* 1st pivot row */ 3336 while (row < i) { 3337 pc = rtmp + row; 3338 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3339 multiplier = (*pc) * (*pv); 3340 *pc = multiplier; 3341 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3342 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3343 pv = ba + bdiag[row+1] + 1; 3344 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3345 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3346 ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr); 3347 } 3348 row = *bjtmp++; 3349 } 3350 3351 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3352 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3353 nzi_bl = 0; j = 0; 3354 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3355 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3356 nzi_bl++; j++; 3357 } 3358 nzi_bu = nzi - nzi_bl -1; 3359 while (j < nzi) { 3360 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3361 j++; 3362 } 3363 3364 bjtmp = bj + bi[i]; 3365 batmp = ba + bi[i]; 3366 /* apply level dropping rule to L part */ 3367 ncut = nzi_al + dtcount; 3368 if (ncut < nzi_bl) { 3369 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3370 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3371 } else { 3372 ncut = nzi_bl; 3373 } 3374 for (j=0; j<ncut; j++) { 3375 bjtmp[j] = jtmp[j]; 3376 batmp[j] = vtmp[j]; 3377 } 3378 bi[i+1] = bi[i] + ncut; 3379 nzi = ncut + 1; 3380 3381 /* apply level dropping rule to U part */ 3382 ncut = nzi_au + dtcount; 3383 if (ncut < nzi_bu) { 3384 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3385 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3386 } else { 3387 ncut = nzi_bu; 3388 } 3389 nzi += ncut; 3390 3391 /* mark bdiagonal */ 3392 bdiag[i+1] = bdiag[i] - (ncut + 1); 3393 bdiag_rev[n-i-1] = bdiag[i+1]; 3394 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3395 bjtmp = bj + bdiag[i]; 3396 batmp = ba + bdiag[i]; 3397 *bjtmp = i; 3398 *batmp = diag_tmp; /* rtmp[i]; */ 3399 if (*batmp == 0.0) { 3400 *batmp = dt+shift; 3401 } 3402 *batmp = 1.0/(*batmp); /* invert diagonal entries for simpler triangular solves */ 3403 3404 bjtmp = bj + bdiag[i+1]+1; 3405 batmp = ba + bdiag[i+1]+1; 3406 for (k=0; k<ncut; k++) { 3407 bjtmp[k] = jtmp[nzi_bl+1+k]; 3408 batmp[k] = vtmp[nzi_bl+1+k]; 3409 } 3410 3411 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3412 } /* for (i=0; i<n; i++) */ 3413 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3414 3415 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3416 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3417 3418 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3419 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3420 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3421 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3422 3423 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3424 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3425 3426 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3427 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3428 if (row_identity && icol_identity) { 3429 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3430 } else { 3431 B->ops->solve = MatSolve_SeqAIJ; 3432 } 3433 3434 B->ops->solveadd = NULL; 3435 B->ops->solvetranspose = NULL; 3436 B->ops->solvetransposeadd = NULL; 3437 B->ops->matsolve = NULL; 3438 B->assembled = PETSC_TRUE; 3439 B->preallocated = PETSC_TRUE; 3440 PetscFunctionReturn(0); 3441 } 3442 3443 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3444 /* 3445 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3446 */ 3447 3448 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3449 { 3450 PetscErrorCode ierr; 3451 3452 PetscFunctionBegin; 3453 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3454 PetscFunctionReturn(0); 3455 } 3456 3457 /* 3458 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3459 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3460 */ 3461 /* 3462 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors 3463 */ 3464 3465 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3466 { 3467 Mat C =fact; 3468 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3469 IS isrow = b->row,isicol = b->icol; 3470 PetscErrorCode ierr; 3471 const PetscInt *r,*ic,*ics; 3472 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3473 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3474 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3475 PetscReal dt=info->dt,shift=info->shiftamount; 3476 PetscBool row_identity, col_identity; 3477 3478 PetscFunctionBegin; 3479 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3480 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3481 ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr); 3482 ics = ic; 3483 3484 for (i=0; i<n; i++) { 3485 /* initialize rtmp array */ 3486 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3487 bjtmp = bj + bi[i]; 3488 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3489 rtmp[i] = 0.0; 3490 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3491 bjtmp = bj + bdiag[i+1] + 1; 3492 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3493 3494 /* load in initial unfactored row of A */ 3495 nz = ai[r[i]+1] - ai[r[i]]; 3496 ajtmp = aj + ai[r[i]]; 3497 v = aa + ai[r[i]]; 3498 for (j=0; j<nz; j++) { 3499 rtmp[ics[*ajtmp++]] = v[j]; 3500 } 3501 3502 /* numerical factorization */ 3503 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3504 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3505 k = 0; 3506 while (k < nzl) { 3507 row = *bjtmp++; 3508 pc = rtmp + row; 3509 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3510 multiplier = (*pc) * (*pv); 3511 *pc = multiplier; 3512 if (PetscAbsScalar(multiplier) > dt) { 3513 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3514 pv = b->a + bdiag[row+1] + 1; 3515 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3516 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3517 ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr); 3518 } 3519 k++; 3520 } 3521 3522 /* finished row so stick it into b->a */ 3523 /* L-part */ 3524 pv = b->a + bi[i]; 3525 pj = bj + bi[i]; 3526 nzl = bi[i+1] - bi[i]; 3527 for (j=0; j<nzl; j++) { 3528 pv[j] = rtmp[pj[j]]; 3529 } 3530 3531 /* diagonal: invert diagonal entries for simpler triangular solves */ 3532 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3533 b->a[bdiag[i]] = 1.0/rtmp[i]; 3534 3535 /* U-part */ 3536 pv = b->a + bdiag[i+1] + 1; 3537 pj = bj + bdiag[i+1] + 1; 3538 nzu = bdiag[i] - bdiag[i+1] - 1; 3539 for (j=0; j<nzu; j++) { 3540 pv[j] = rtmp[pj[j]]; 3541 } 3542 } 3543 3544 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3545 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3546 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3547 3548 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3549 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3550 if (row_identity && col_identity) { 3551 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3552 } else { 3553 C->ops->solve = MatSolve_SeqAIJ; 3554 } 3555 C->ops->solveadd = NULL; 3556 C->ops->solvetranspose = NULL; 3557 C->ops->solvetransposeadd = NULL; 3558 C->ops->matsolve = NULL; 3559 C->assembled = PETSC_TRUE; 3560 C->preallocated = PETSC_TRUE; 3561 3562 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3563 PetscFunctionReturn(0); 3564 } 3565