1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 EXTERN_C_BEGIN 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 10 /* 11 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 12 13 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 14 */ 15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 16 { 17 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 20 const PetscInt *ai = a->i, *aj = a->j; 21 const PetscScalar *aa = a->a; 22 PetscBool *done; 23 PetscReal best,past = 0,future; 24 25 PetscFunctionBegin; 26 /* pick initial row */ 27 best = -1; 28 for (i=0; i<n; i++) { 29 future = 0.0; 30 for (j=ai[i]; j<ai[i+1]; j++) { 31 if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 42 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 EXTERN_C_END 94 95 EXTERN_C_BEGIN 96 #undef __FUNCT__ 97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 99 { 100 PetscFunctionBegin; 101 *flg = PETSC_TRUE; 102 PetscFunctionReturn(0); 103 } 104 EXTERN_C_END 105 106 EXTERN_C_BEGIN 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 110 { 111 PetscInt n = A->rmap->n; 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 #if defined(PETSC_USE_COMPLEX) 116 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC))SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 117 #endif 118 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 119 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 120 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 121 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 122 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 123 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 124 ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 125 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 126 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 127 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 128 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 129 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 130 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 131 (*B)->factortype = ftype; 132 PetscFunctionReturn(0); 133 } 134 EXTERN_C_END 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 138 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 139 { 140 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 141 IS isicol; 142 PetscErrorCode ierr; 143 const PetscInt *r,*ic; 144 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 145 PetscInt *bi,*bj,*ajtmp; 146 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 147 PetscReal f; 148 PetscInt nlnk,*lnk,k,**bi_ptr; 149 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 150 PetscBT lnkbt; 151 152 PetscFunctionBegin; 153 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 154 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 155 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 156 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 157 158 /* get new row pointers */ 159 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 160 bi[0] = 0; 161 162 /* bdiag is location of diagonal in factor */ 163 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 164 bdiag[0] = 0; 165 166 /* linked list for storing column indices of the active row */ 167 nlnk = n + 1; 168 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 169 170 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 171 172 /* initial FreeSpace size is f*(ai[n]+1) */ 173 f = info->fill; 174 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 175 current_space = free_space; 176 177 for (i=0; i<n; i++) { 178 /* copy previous fill into linked list */ 179 nzi = 0; 180 nnz = ai[r[i]+1] - ai[r[i]]; 181 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 182 ajtmp = aj + ai[r[i]]; 183 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 184 nzi += nlnk; 185 186 /* add pivot rows into linked list */ 187 row = lnk[n]; 188 while (row < i) { 189 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 190 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 191 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 192 nzi += nlnk; 193 row = lnk[row]; 194 } 195 bi[i+1] = bi[i] + nzi; 196 im[i] = nzi; 197 198 /* mark bdiag */ 199 nzbd = 0; 200 nnz = nzi; 201 k = lnk[n]; 202 while (nnz-- && k < i){ 203 nzbd++; 204 k = lnk[k]; 205 } 206 bdiag[i] = bi[i] + nzbd; 207 208 /* if free space is not available, make more free space */ 209 if (current_space->local_remaining<nzi) { 210 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 211 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 212 reallocs++; 213 } 214 215 /* copy data into free space, then initialize lnk */ 216 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 217 bi_ptr[i] = current_space->array; 218 current_space->array += nzi; 219 current_space->local_used += nzi; 220 current_space->local_remaining -= nzi; 221 } 222 #if defined(PETSC_USE_INFO) 223 if (ai[n] != 0) { 224 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 225 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 226 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 227 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 228 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 229 } else { 230 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 231 } 232 #endif 233 234 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 235 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 236 237 /* destroy list of free space and other temporary array(s) */ 238 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 239 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 240 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 241 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 242 243 /* put together the new matrix */ 244 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 245 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 246 b = (Mat_SeqAIJ*)(B)->data; 247 b->free_a = PETSC_TRUE; 248 b->free_ij = PETSC_TRUE; 249 b->singlemalloc = PETSC_FALSE; 250 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 251 b->j = bj; 252 b->i = bi; 253 b->diag = bdiag; 254 b->ilen = 0; 255 b->imax = 0; 256 b->row = isrow; 257 b->col = iscol; 258 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 259 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 260 b->icol = isicol; 261 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 262 263 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 264 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 265 b->maxnz = b->nz = bi[n] ; 266 267 (B)->factortype = MAT_FACTOR_LU; 268 (B)->info.factor_mallocs = reallocs; 269 (B)->info.fill_ratio_given = f; 270 271 if (ai[n]) { 272 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 273 } else { 274 (B)->info.fill_ratio_needed = 0.0; 275 } 276 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 277 if (a->inode.size) { 278 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 285 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 286 { 287 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 288 IS isicol; 289 PetscErrorCode ierr; 290 const PetscInt *r,*ic; 291 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 292 PetscInt *bi,*bj,*ajtmp; 293 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 294 PetscReal f; 295 PetscInt nlnk,*lnk,k,**bi_ptr; 296 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 297 PetscBT lnkbt; 298 299 PetscFunctionBegin; 300 /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 301 /* 302 PetscBool olddatastruct=PETSC_FALSE; 303 ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 304 if(olddatastruct){ 305 ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 */ 309 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 310 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 311 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 312 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 313 314 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 315 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 316 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 317 bi[0] = bdiag[0] = 0; 318 319 /* linked list for storing column indices of the active row */ 320 nlnk = n + 1; 321 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 322 323 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 324 325 /* initial FreeSpace size is f*(ai[n]+1) */ 326 f = info->fill; 327 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 328 current_space = free_space; 329 330 for (i=0; i<n; i++) { 331 /* copy previous fill into linked list */ 332 nzi = 0; 333 nnz = ai[r[i]+1] - ai[r[i]]; 334 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 335 ajtmp = aj + ai[r[i]]; 336 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 337 nzi += nlnk; 338 339 /* add pivot rows into linked list */ 340 row = lnk[n]; 341 while (row < i){ 342 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 343 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 344 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 345 nzi += nlnk; 346 row = lnk[row]; 347 } 348 bi[i+1] = bi[i] + nzi; 349 im[i] = nzi; 350 351 /* mark bdiag */ 352 nzbd = 0; 353 nnz = nzi; 354 k = lnk[n]; 355 while (nnz-- && k < i){ 356 nzbd++; 357 k = lnk[k]; 358 } 359 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 360 361 /* if free space is not available, make more free space */ 362 if (current_space->local_remaining<nzi) { 363 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 364 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 365 reallocs++; 366 } 367 368 /* copy data into free space, then initialize lnk */ 369 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 370 bi_ptr[i] = current_space->array; 371 current_space->array += nzi; 372 current_space->local_used += nzi; 373 current_space->local_remaining -= nzi; 374 } 375 376 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 377 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 378 379 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 380 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 381 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 382 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 383 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 384 385 /* put together the new matrix */ 386 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 387 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 388 b = (Mat_SeqAIJ*)(B)->data; 389 b->free_a = PETSC_TRUE; 390 b->free_ij = PETSC_TRUE; 391 b->singlemalloc = PETSC_FALSE; 392 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 393 b->j = bj; 394 b->i = bi; 395 b->diag = bdiag; 396 b->ilen = 0; 397 b->imax = 0; 398 b->row = isrow; 399 b->col = iscol; 400 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 401 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 402 b->icol = isicol; 403 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 404 405 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 406 ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 407 b->maxnz = b->nz = bdiag[0]+1; 408 B->factortype = MAT_FACTOR_LU; 409 B->info.factor_mallocs = reallocs; 410 B->info.fill_ratio_given = f; 411 412 if (ai[n]) { 413 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 414 } else { 415 B->info.fill_ratio_needed = 0.0; 416 } 417 #if defined(PETSC_USE_INFO) 418 if (ai[n] != 0) { 419 PetscReal af = B->info.fill_ratio_needed; 420 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 421 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 422 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 423 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 424 } else { 425 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 426 } 427 #endif 428 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 429 if (a->inode.size) { 430 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 431 } 432 PetscFunctionReturn(0); 433 } 434 435 /* 436 Trouble in factorization, should we dump the original matrix? 437 */ 438 #undef __FUNCT__ 439 #define __FUNCT__ "MatFactorDumpMatrix" 440 PetscErrorCode MatFactorDumpMatrix(Mat A) 441 { 442 PetscErrorCode ierr; 443 PetscBool flg = PETSC_FALSE; 444 445 PetscFunctionBegin; 446 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 447 if (flg) { 448 PetscViewer viewer; 449 char filename[PETSC_MAX_PATH_LEN]; 450 451 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 452 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 453 ierr = MatView(A,viewer);CHKERRQ(ierr); 454 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 455 } 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 461 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 462 { 463 Mat C=B; 464 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 465 IS isrow = b->row,isicol = b->icol; 466 PetscErrorCode ierr; 467 const PetscInt *r,*ic,*ics; 468 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 469 PetscInt i,j,k,nz,nzL,row,*pj; 470 const PetscInt *ajtmp,*bjtmp; 471 MatScalar *rtmp,*pc,multiplier,*pv; 472 const MatScalar *aa=a->a,*v; 473 PetscBool row_identity,col_identity; 474 FactorShiftCtx sctx; 475 const PetscInt *ddiag; 476 PetscReal rs; 477 MatScalar d; 478 479 PetscFunctionBegin; 480 /* MatPivotSetUp(): initialize shift context sctx */ 481 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 482 483 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 484 ddiag = a->diag; 485 sctx.shift_top = info->zeropivot; 486 for (i=0; i<n; i++) { 487 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 488 d = (aa)[ddiag[i]]; 489 rs = -PetscAbsScalar(d) - PetscRealPart(d); 490 v = aa+ai[i]; 491 nz = ai[i+1] - ai[i]; 492 for (j=0; j<nz; j++) 493 rs += PetscAbsScalar(v[j]); 494 if (rs>sctx.shift_top) sctx.shift_top = rs; 495 } 496 sctx.shift_top *= 1.1; 497 sctx.nshift_max = 5; 498 sctx.shift_lo = 0.; 499 sctx.shift_hi = 1.; 500 } 501 502 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 503 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 504 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 505 ics = ic; 506 507 do { 508 sctx.newshift = PETSC_FALSE; 509 for (i=0; i<n; i++){ 510 /* zero rtmp */ 511 /* L part */ 512 nz = bi[i+1] - bi[i]; 513 bjtmp = bj + bi[i]; 514 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 515 516 /* U part */ 517 nz = bdiag[i]-bdiag[i+1]; 518 bjtmp = bj + bdiag[i+1]+1; 519 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 520 521 /* load in initial (unfactored row) */ 522 nz = ai[r[i]+1] - ai[r[i]]; 523 ajtmp = aj + ai[r[i]]; 524 v = aa + ai[r[i]]; 525 for (j=0; j<nz; j++) { 526 rtmp[ics[ajtmp[j]]] = v[j]; 527 } 528 /* ZeropivotApply() */ 529 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 530 531 /* elimination */ 532 bjtmp = bj + bi[i]; 533 row = *bjtmp++; 534 nzL = bi[i+1] - bi[i]; 535 for(k=0; k < nzL;k++) { 536 pc = rtmp + row; 537 if (*pc != 0.0) { 538 pv = b->a + bdiag[row]; 539 multiplier = *pc * (*pv); 540 *pc = multiplier; 541 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 542 pv = b->a + bdiag[row+1]+1; 543 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 544 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 545 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 546 } 547 row = *bjtmp++; 548 } 549 550 /* finished row so stick it into b->a */ 551 rs = 0.0; 552 /* L part */ 553 pv = b->a + bi[i] ; 554 pj = b->j + bi[i] ; 555 nz = bi[i+1] - bi[i]; 556 for (j=0; j<nz; j++) { 557 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 558 } 559 560 /* U part */ 561 pv = b->a + bdiag[i+1]+1; 562 pj = b->j + bdiag[i+1]+1; 563 nz = bdiag[i] - bdiag[i+1]-1; 564 for (j=0; j<nz; j++) { 565 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 566 } 567 568 sctx.rs = rs; 569 sctx.pv = rtmp[i]; 570 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 571 if(sctx.newshift) break; /* break for-loop */ 572 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 573 574 /* Mark diagonal and invert diagonal for simplier triangular solves */ 575 pv = b->a + bdiag[i]; 576 *pv = 1.0/rtmp[i]; 577 578 } /* endof for (i=0; i<n; i++){ */ 579 580 /* MatPivotRefine() */ 581 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){ 582 /* 583 * if no shift in this attempt & shifting & started shifting & can refine, 584 * then try lower shift 585 */ 586 sctx.shift_hi = sctx.shift_fraction; 587 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 588 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 589 sctx.newshift = PETSC_TRUE; 590 sctx.nshift++; 591 } 592 } while (sctx.newshift); 593 594 ierr = PetscFree(rtmp);CHKERRQ(ierr); 595 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 596 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 597 598 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 599 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 600 if (row_identity && col_identity) { 601 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 602 } else { 603 C->ops->solve = MatSolve_SeqAIJ; 604 } 605 C->ops->solveadd = MatSolveAdd_SeqAIJ; 606 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 607 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 608 C->ops->matsolve = MatMatSolve_SeqAIJ; 609 C->assembled = PETSC_TRUE; 610 C->preallocated = PETSC_TRUE; 611 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 612 613 /* MatShiftView(A,info,&sctx) */ 614 if (sctx.nshift){ 615 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 616 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 617 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 618 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 619 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 620 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 621 } 622 } 623 ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 #undef __FUNCT__ 628 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 629 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 630 { 631 Mat C=B; 632 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 633 IS isrow = b->row,isicol = b->icol; 634 PetscErrorCode ierr; 635 const PetscInt *r,*ic,*ics; 636 PetscInt nz,row,i,j,n=A->rmap->n,diag; 637 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 638 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 639 MatScalar *pv,*rtmp,*pc,multiplier,d; 640 const MatScalar *v,*aa=a->a; 641 PetscReal rs=0.0; 642 FactorShiftCtx sctx; 643 const PetscInt *ddiag; 644 PetscBool row_identity, col_identity; 645 646 PetscFunctionBegin; 647 /* MatPivotSetUp(): initialize shift context sctx */ 648 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 649 650 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 651 ddiag = a->diag; 652 sctx.shift_top = info->zeropivot; 653 for (i=0; i<n; i++) { 654 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 655 d = (aa)[ddiag[i]]; 656 rs = -PetscAbsScalar(d) - PetscRealPart(d); 657 v = aa+ai[i]; 658 nz = ai[i+1] - ai[i]; 659 for (j=0; j<nz; j++) 660 rs += PetscAbsScalar(v[j]); 661 if (rs>sctx.shift_top) sctx.shift_top = rs; 662 } 663 sctx.shift_top *= 1.1; 664 sctx.nshift_max = 5; 665 sctx.shift_lo = 0.; 666 sctx.shift_hi = 1.; 667 } 668 669 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 670 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 671 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 672 ics = ic; 673 674 do { 675 sctx.newshift = PETSC_FALSE; 676 for (i=0; i<n; i++){ 677 nz = bi[i+1] - bi[i]; 678 bjtmp = bj + bi[i]; 679 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 680 681 /* load in initial (unfactored row) */ 682 nz = ai[r[i]+1] - ai[r[i]]; 683 ajtmp = aj + ai[r[i]]; 684 v = aa + ai[r[i]]; 685 for (j=0; j<nz; j++) { 686 rtmp[ics[ajtmp[j]]] = v[j]; 687 } 688 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 689 690 row = *bjtmp++; 691 while (row < i) { 692 pc = rtmp + row; 693 if (*pc != 0.0) { 694 pv = b->a + diag_offset[row]; 695 pj = b->j + diag_offset[row] + 1; 696 multiplier = *pc / *pv++; 697 *pc = multiplier; 698 nz = bi[row+1] - diag_offset[row] - 1; 699 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 700 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 701 } 702 row = *bjtmp++; 703 } 704 /* finished row so stick it into b->a */ 705 pv = b->a + bi[i] ; 706 pj = b->j + bi[i] ; 707 nz = bi[i+1] - bi[i]; 708 diag = diag_offset[i] - bi[i]; 709 rs = 0.0; 710 for (j=0; j<nz; j++) { 711 pv[j] = rtmp[pj[j]]; 712 rs += PetscAbsScalar(pv[j]); 713 } 714 rs -= PetscAbsScalar(pv[diag]); 715 716 sctx.rs = rs; 717 sctx.pv = pv[diag]; 718 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 719 if (sctx.newshift) break; 720 pv[diag] = sctx.pv; 721 } 722 723 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 724 /* 725 * if no shift in this attempt & shifting & started shifting & can refine, 726 * then try lower shift 727 */ 728 sctx.shift_hi = sctx.shift_fraction; 729 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 730 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 731 sctx.newshift = PETSC_TRUE; 732 sctx.nshift++; 733 } 734 } while (sctx.newshift); 735 736 /* invert diagonal entries for simplier triangular solves */ 737 for (i=0; i<n; i++) { 738 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 739 } 740 ierr = PetscFree(rtmp);CHKERRQ(ierr); 741 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 742 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 743 744 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 745 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 746 if (row_identity && col_identity) { 747 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 748 } else { 749 C->ops->solve = MatSolve_SeqAIJ_inplace; 750 } 751 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 752 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 753 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 754 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 755 C->assembled = PETSC_TRUE; 756 C->preallocated = PETSC_TRUE; 757 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 758 if (sctx.nshift){ 759 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 760 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 761 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 762 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 763 } 764 } 765 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 766 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 767 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } 770 771 /* 772 This routine implements inplace ILU(0) with row or/and column permutations. 773 Input: 774 A - original matrix 775 Output; 776 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 777 a->j (col index) is permuted by the inverse of colperm, then sorted 778 a->a reordered accordingly with a->j 779 a->diag (ptr to diagonal elements) is updated. 780 */ 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 783 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 784 { 785 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 786 IS isrow = a->row,isicol = a->icol; 787 PetscErrorCode ierr; 788 const PetscInt *r,*ic,*ics; 789 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 790 PetscInt *ajtmp,nz,row; 791 PetscInt *diag = a->diag,nbdiag,*pj; 792 PetscScalar *rtmp,*pc,multiplier,d; 793 MatScalar *pv,*v; 794 PetscReal rs; 795 FactorShiftCtx sctx; 796 const MatScalar *aa=a->a,*vtmp; 797 798 PetscFunctionBegin; 799 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 800 801 /* MatPivotSetUp(): initialize shift context sctx */ 802 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 803 804 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 805 const PetscInt *ddiag = a->diag; 806 sctx.shift_top = info->zeropivot; 807 for (i=0; i<n; i++) { 808 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 809 d = (aa)[ddiag[i]]; 810 rs = -PetscAbsScalar(d) - PetscRealPart(d); 811 vtmp = aa+ai[i]; 812 nz = ai[i+1] - ai[i]; 813 for (j=0; j<nz; j++) 814 rs += PetscAbsScalar(vtmp[j]); 815 if (rs>sctx.shift_top) sctx.shift_top = rs; 816 } 817 sctx.shift_top *= 1.1; 818 sctx.nshift_max = 5; 819 sctx.shift_lo = 0.; 820 sctx.shift_hi = 1.; 821 } 822 823 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 824 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 825 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 826 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 827 ics = ic; 828 829 #if defined(MV) 830 sctx.shift_top = 0.; 831 sctx.nshift_max = 0; 832 sctx.shift_lo = 0.; 833 sctx.shift_hi = 0.; 834 sctx.shift_fraction = 0.; 835 836 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 837 sctx.shift_top = 0.; 838 for (i=0; i<n; i++) { 839 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 840 d = (a->a)[diag[i]]; 841 rs = -PetscAbsScalar(d) - PetscRealPart(d); 842 v = a->a+ai[i]; 843 nz = ai[i+1] - ai[i]; 844 for (j=0; j<nz; j++) 845 rs += PetscAbsScalar(v[j]); 846 if (rs>sctx.shift_top) sctx.shift_top = rs; 847 } 848 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 849 sctx.shift_top *= 1.1; 850 sctx.nshift_max = 5; 851 sctx.shift_lo = 0.; 852 sctx.shift_hi = 1.; 853 } 854 855 sctx.shift_amount = 0.; 856 sctx.nshift = 0; 857 #endif 858 859 do { 860 sctx.newshift = PETSC_FALSE; 861 for (i=0; i<n; i++){ 862 /* load in initial unfactored row */ 863 nz = ai[r[i]+1] - ai[r[i]]; 864 ajtmp = aj + ai[r[i]]; 865 v = a->a + ai[r[i]]; 866 /* sort permuted ajtmp and values v accordingly */ 867 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 868 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 869 870 diag[r[i]] = ai[r[i]]; 871 for (j=0; j<nz; j++) { 872 rtmp[ajtmp[j]] = v[j]; 873 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 874 } 875 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 876 877 row = *ajtmp++; 878 while (row < i) { 879 pc = rtmp + row; 880 if (*pc != 0.0) { 881 pv = a->a + diag[r[row]]; 882 pj = aj + diag[r[row]] + 1; 883 884 multiplier = *pc / *pv++; 885 *pc = multiplier; 886 nz = ai[r[row]+1] - diag[r[row]] - 1; 887 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 888 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 889 } 890 row = *ajtmp++; 891 } 892 /* finished row so overwrite it onto a->a */ 893 pv = a->a + ai[r[i]] ; 894 pj = aj + ai[r[i]] ; 895 nz = ai[r[i]+1] - ai[r[i]]; 896 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 897 898 rs = 0.0; 899 for (j=0; j<nz; j++) { 900 pv[j] = rtmp[pj[j]]; 901 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 902 } 903 904 sctx.rs = rs; 905 sctx.pv = pv[nbdiag]; 906 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 907 if (sctx.newshift) break; 908 pv[nbdiag] = sctx.pv; 909 } 910 911 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 912 /* 913 * if no shift in this attempt & shifting & started shifting & can refine, 914 * then try lower shift 915 */ 916 sctx.shift_hi = sctx.shift_fraction; 917 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 918 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 919 sctx.newshift = PETSC_TRUE; 920 sctx.nshift++; 921 } 922 } while (sctx.newshift); 923 924 /* invert diagonal entries for simplier triangular solves */ 925 for (i=0; i<n; i++) { 926 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 927 } 928 929 ierr = PetscFree(rtmp);CHKERRQ(ierr); 930 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 931 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 932 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 933 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 934 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 935 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 936 A->assembled = PETSC_TRUE; 937 A->preallocated = PETSC_TRUE; 938 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 939 if (sctx.nshift){ 940 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 941 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 942 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 943 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 944 } 945 } 946 PetscFunctionReturn(0); 947 } 948 949 /* ----------------------------------------------------------- */ 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatLUFactor_SeqAIJ" 952 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 953 { 954 PetscErrorCode ierr; 955 Mat C; 956 957 PetscFunctionBegin; 958 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 959 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 960 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 961 A->ops->solve = C->ops->solve; 962 A->ops->solvetranspose = C->ops->solvetranspose; 963 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 964 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 /* ----------------------------------------------------------- */ 968 969 970 #undef __FUNCT__ 971 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 972 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 973 { 974 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 975 IS iscol = a->col,isrow = a->row; 976 PetscErrorCode ierr; 977 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 978 PetscInt nz; 979 const PetscInt *rout,*cout,*r,*c; 980 PetscScalar *x,*tmp,*tmps,sum; 981 const PetscScalar *b; 982 const MatScalar *aa = a->a,*v; 983 984 PetscFunctionBegin; 985 if (!n) PetscFunctionReturn(0); 986 987 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 988 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 989 tmp = a->solve_work; 990 991 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 992 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 993 994 /* forward solve the lower triangular */ 995 tmp[0] = b[*r++]; 996 tmps = tmp; 997 for (i=1; i<n; i++) { 998 v = aa + ai[i] ; 999 vi = aj + ai[i] ; 1000 nz = a->diag[i] - ai[i]; 1001 sum = b[*r++]; 1002 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1003 tmp[i] = sum; 1004 } 1005 1006 /* backward solve the upper triangular */ 1007 for (i=n-1; i>=0; i--){ 1008 v = aa + a->diag[i] + 1; 1009 vi = aj + a->diag[i] + 1; 1010 nz = ai[i+1] - a->diag[i] - 1; 1011 sum = tmp[i]; 1012 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1013 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1014 } 1015 1016 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1017 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1018 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1019 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1020 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1026 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1027 { 1028 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1029 IS iscol = a->col,isrow = a->row; 1030 PetscErrorCode ierr; 1031 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1032 PetscInt nz,neq; 1033 const PetscInt *rout,*cout,*r,*c; 1034 PetscScalar *x,*b,*tmp,*tmps,sum; 1035 const MatScalar *aa = a->a,*v; 1036 PetscBool bisdense,xisdense; 1037 1038 PetscFunctionBegin; 1039 if (!n) PetscFunctionReturn(0); 1040 1041 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1042 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1043 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1044 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1045 1046 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1047 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1048 1049 tmp = a->solve_work; 1050 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1051 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1052 1053 for (neq=0; neq<B->cmap->n; neq++){ 1054 /* forward solve the lower triangular */ 1055 tmp[0] = b[r[0]]; 1056 tmps = tmp; 1057 for (i=1; i<n; i++) { 1058 v = aa + ai[i] ; 1059 vi = aj + ai[i] ; 1060 nz = a->diag[i] - ai[i]; 1061 sum = b[r[i]]; 1062 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1063 tmp[i] = sum; 1064 } 1065 /* backward solve the upper triangular */ 1066 for (i=n-1; i>=0; i--){ 1067 v = aa + a->diag[i] + 1; 1068 vi = aj + a->diag[i] + 1; 1069 nz = ai[i+1] - a->diag[i] - 1; 1070 sum = tmp[i]; 1071 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1072 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1073 } 1074 1075 b += n; 1076 x += n; 1077 } 1078 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1079 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1080 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1081 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1082 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1088 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1089 { 1090 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1091 IS iscol = a->col,isrow = a->row; 1092 PetscErrorCode ierr; 1093 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1094 PetscInt nz,neq; 1095 const PetscInt *rout,*cout,*r,*c; 1096 PetscScalar *x,*b,*tmp,sum; 1097 const MatScalar *aa = a->a,*v; 1098 PetscBool bisdense,xisdense; 1099 1100 PetscFunctionBegin; 1101 if (!n) PetscFunctionReturn(0); 1102 1103 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1104 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1105 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1106 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1107 1108 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1109 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1110 1111 tmp = a->solve_work; 1112 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1113 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1114 1115 for (neq=0; neq<B->cmap->n; neq++){ 1116 /* forward solve the lower triangular */ 1117 tmp[0] = b[r[0]]; 1118 v = aa; 1119 vi = aj; 1120 for (i=1; i<n; i++) { 1121 nz = ai[i+1] - ai[i]; 1122 sum = b[r[i]]; 1123 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1124 tmp[i] = sum; 1125 v += nz; vi += nz; 1126 } 1127 1128 /* backward solve the upper triangular */ 1129 for (i=n-1; i>=0; i--){ 1130 v = aa + adiag[i+1]+1; 1131 vi = aj + adiag[i+1]+1; 1132 nz = adiag[i]-adiag[i+1]-1; 1133 sum = tmp[i]; 1134 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1135 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1136 } 1137 1138 b += n; 1139 x += n; 1140 } 1141 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1142 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1143 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1144 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1145 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1151 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1152 { 1153 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1154 IS iscol = a->col,isrow = a->row; 1155 PetscErrorCode ierr; 1156 const PetscInt *r,*c,*rout,*cout; 1157 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1158 PetscInt nz,row; 1159 PetscScalar *x,*b,*tmp,*tmps,sum; 1160 const MatScalar *aa = a->a,*v; 1161 1162 PetscFunctionBegin; 1163 if (!n) PetscFunctionReturn(0); 1164 1165 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1166 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1167 tmp = a->solve_work; 1168 1169 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1170 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1171 1172 /* forward solve the lower triangular */ 1173 tmp[0] = b[*r++]; 1174 tmps = tmp; 1175 for (row=1; row<n; row++) { 1176 i = rout[row]; /* permuted row */ 1177 v = aa + ai[i] ; 1178 vi = aj + ai[i] ; 1179 nz = a->diag[i] - ai[i]; 1180 sum = b[*r++]; 1181 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1182 tmp[row] = sum; 1183 } 1184 1185 /* backward solve the upper triangular */ 1186 for (row=n-1; row>=0; row--){ 1187 i = rout[row]; /* permuted row */ 1188 v = aa + a->diag[i] + 1; 1189 vi = aj + a->diag[i] + 1; 1190 nz = ai[i+1] - a->diag[i] - 1; 1191 sum = tmp[row]; 1192 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1193 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1194 } 1195 1196 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1197 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1198 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1199 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1200 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 /* ----------------------------------------------------------- */ 1205 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1208 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1209 { 1210 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1211 PetscErrorCode ierr; 1212 PetscInt n = A->rmap->n; 1213 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1214 PetscScalar *x; 1215 const PetscScalar *b; 1216 const MatScalar *aa = a->a; 1217 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1218 PetscInt adiag_i,i,nz,ai_i; 1219 const PetscInt *vi; 1220 const MatScalar *v; 1221 PetscScalar sum; 1222 #endif 1223 1224 PetscFunctionBegin; 1225 if (!n) PetscFunctionReturn(0); 1226 1227 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1228 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1229 1230 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1231 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1232 #else 1233 /* forward solve the lower triangular */ 1234 x[0] = b[0]; 1235 for (i=1; i<n; i++) { 1236 ai_i = ai[i]; 1237 v = aa + ai_i; 1238 vi = aj + ai_i; 1239 nz = adiag[i] - ai_i; 1240 sum = b[i]; 1241 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1242 x[i] = sum; 1243 } 1244 1245 /* backward solve the upper triangular */ 1246 for (i=n-1; i>=0; i--){ 1247 adiag_i = adiag[i]; 1248 v = aa + adiag_i + 1; 1249 vi = aj + adiag_i + 1; 1250 nz = ai[i+1] - adiag_i - 1; 1251 sum = x[i]; 1252 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1253 x[i] = sum*aa[adiag_i]; 1254 } 1255 #endif 1256 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1257 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1258 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1264 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1265 { 1266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1267 IS iscol = a->col,isrow = a->row; 1268 PetscErrorCode ierr; 1269 PetscInt i, n = A->rmap->n,j; 1270 PetscInt nz; 1271 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1272 PetscScalar *x,*tmp,sum; 1273 const PetscScalar *b; 1274 const MatScalar *aa = a->a,*v; 1275 1276 PetscFunctionBegin; 1277 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1278 1279 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1280 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1281 tmp = a->solve_work; 1282 1283 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1284 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1285 1286 /* forward solve the lower triangular */ 1287 tmp[0] = b[*r++]; 1288 for (i=1; i<n; i++) { 1289 v = aa + ai[i] ; 1290 vi = aj + ai[i] ; 1291 nz = a->diag[i] - ai[i]; 1292 sum = b[*r++]; 1293 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1294 tmp[i] = sum; 1295 } 1296 1297 /* backward solve the upper triangular */ 1298 for (i=n-1; i>=0; i--){ 1299 v = aa + a->diag[i] + 1; 1300 vi = aj + a->diag[i] + 1; 1301 nz = ai[i+1] - a->diag[i] - 1; 1302 sum = tmp[i]; 1303 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1304 tmp[i] = sum*aa[a->diag[i]]; 1305 x[*c--] += tmp[i]; 1306 } 1307 1308 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1309 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1310 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1311 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1312 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1313 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1319 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1320 { 1321 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1322 IS iscol = a->col,isrow = a->row; 1323 PetscErrorCode ierr; 1324 PetscInt i, n = A->rmap->n,j; 1325 PetscInt nz; 1326 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1327 PetscScalar *x,*tmp,sum; 1328 const PetscScalar *b; 1329 const MatScalar *aa = a->a,*v; 1330 1331 PetscFunctionBegin; 1332 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1333 1334 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1335 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1336 tmp = a->solve_work; 1337 1338 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1339 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1340 1341 /* forward solve the lower triangular */ 1342 tmp[0] = b[r[0]]; 1343 v = aa; 1344 vi = aj; 1345 for (i=1; i<n; i++) { 1346 nz = ai[i+1] - ai[i]; 1347 sum = b[r[i]]; 1348 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1349 tmp[i] = sum; 1350 v += nz; vi += nz; 1351 } 1352 1353 /* backward solve the upper triangular */ 1354 v = aa + adiag[n-1]; 1355 vi = aj + adiag[n-1]; 1356 for (i=n-1; i>=0; i--){ 1357 nz = adiag[i] - adiag[i+1] - 1; 1358 sum = tmp[i]; 1359 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1360 tmp[i] = sum*v[nz]; 1361 x[c[i]] += tmp[i]; 1362 v += nz+1; vi += nz+1; 1363 } 1364 1365 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1366 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1367 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1368 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1369 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1370 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1376 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1377 { 1378 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1379 IS iscol = a->col,isrow = a->row; 1380 PetscErrorCode ierr; 1381 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1382 PetscInt i,n = A->rmap->n,j; 1383 PetscInt nz; 1384 PetscScalar *x,*tmp,s1; 1385 const MatScalar *aa = a->a,*v; 1386 const PetscScalar *b; 1387 1388 PetscFunctionBegin; 1389 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1390 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1391 tmp = a->solve_work; 1392 1393 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1394 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1395 1396 /* copy the b into temp work space according to permutation */ 1397 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1398 1399 /* forward solve the U^T */ 1400 for (i=0; i<n; i++) { 1401 v = aa + diag[i] ; 1402 vi = aj + diag[i] + 1; 1403 nz = ai[i+1] - diag[i] - 1; 1404 s1 = tmp[i]; 1405 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1406 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1407 tmp[i] = s1; 1408 } 1409 1410 /* backward solve the L^T */ 1411 for (i=n-1; i>=0; i--){ 1412 v = aa + diag[i] - 1 ; 1413 vi = aj + diag[i] - 1 ; 1414 nz = diag[i] - ai[i]; 1415 s1 = tmp[i]; 1416 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1417 } 1418 1419 /* copy tmp into x according to permutation */ 1420 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1421 1422 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1423 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1424 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1425 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1426 1427 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1433 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1434 { 1435 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1436 IS iscol = a->col,isrow = a->row; 1437 PetscErrorCode ierr; 1438 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1439 PetscInt i,n = A->rmap->n,j; 1440 PetscInt nz; 1441 PetscScalar *x,*tmp,s1; 1442 const MatScalar *aa = a->a,*v; 1443 const PetscScalar *b; 1444 1445 PetscFunctionBegin; 1446 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1447 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1448 tmp = a->solve_work; 1449 1450 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1451 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1452 1453 /* copy the b into temp work space according to permutation */ 1454 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1455 1456 /* forward solve the U^T */ 1457 for (i=0; i<n; i++) { 1458 v = aa + adiag[i+1] + 1; 1459 vi = aj + adiag[i+1] + 1; 1460 nz = adiag[i] - adiag[i+1] - 1; 1461 s1 = tmp[i]; 1462 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1463 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1464 tmp[i] = s1; 1465 } 1466 1467 /* backward solve the L^T */ 1468 for (i=n-1; i>=0; i--){ 1469 v = aa + ai[i]; 1470 vi = aj + ai[i]; 1471 nz = ai[i+1] - ai[i]; 1472 s1 = tmp[i]; 1473 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1474 } 1475 1476 /* copy tmp into x according to permutation */ 1477 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1478 1479 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1480 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1481 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1482 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1483 1484 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1490 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1491 { 1492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1493 IS iscol = a->col,isrow = a->row; 1494 PetscErrorCode ierr; 1495 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1496 PetscInt i,n = A->rmap->n,j; 1497 PetscInt nz; 1498 PetscScalar *x,*tmp,s1; 1499 const MatScalar *aa = a->a,*v; 1500 const PetscScalar *b; 1501 1502 PetscFunctionBegin; 1503 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1504 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1505 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1506 tmp = a->solve_work; 1507 1508 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1509 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1510 1511 /* copy the b into temp work space according to permutation */ 1512 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1513 1514 /* forward solve the U^T */ 1515 for (i=0; i<n; i++) { 1516 v = aa + diag[i] ; 1517 vi = aj + diag[i] + 1; 1518 nz = ai[i+1] - diag[i] - 1; 1519 s1 = tmp[i]; 1520 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1521 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1522 tmp[i] = s1; 1523 } 1524 1525 /* backward solve the L^T */ 1526 for (i=n-1; i>=0; i--){ 1527 v = aa + diag[i] - 1 ; 1528 vi = aj + diag[i] - 1 ; 1529 nz = diag[i] - ai[i]; 1530 s1 = tmp[i]; 1531 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1532 } 1533 1534 /* copy tmp into x according to permutation */ 1535 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1536 1537 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1538 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1539 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1540 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1541 1542 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1548 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1549 { 1550 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1551 IS iscol = a->col,isrow = a->row; 1552 PetscErrorCode ierr; 1553 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1554 PetscInt i,n = A->rmap->n,j; 1555 PetscInt nz; 1556 PetscScalar *x,*tmp,s1; 1557 const MatScalar *aa = a->a,*v; 1558 const PetscScalar *b; 1559 1560 PetscFunctionBegin; 1561 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1562 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1563 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1564 tmp = a->solve_work; 1565 1566 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1567 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1568 1569 /* copy the b into temp work space according to permutation */ 1570 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1571 1572 /* forward solve the U^T */ 1573 for (i=0; i<n; i++) { 1574 v = aa + adiag[i+1] + 1; 1575 vi = aj + adiag[i+1] + 1; 1576 nz = adiag[i] - adiag[i+1] - 1; 1577 s1 = tmp[i]; 1578 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1579 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1580 tmp[i] = s1; 1581 } 1582 1583 1584 /* backward solve the L^T */ 1585 for (i=n-1; i>=0; i--){ 1586 v = aa + ai[i] ; 1587 vi = aj + ai[i]; 1588 nz = ai[i+1] - ai[i]; 1589 s1 = tmp[i]; 1590 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1591 } 1592 1593 /* copy tmp into x according to permutation */ 1594 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1595 1596 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1597 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1598 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1599 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1600 1601 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1602 PetscFunctionReturn(0); 1603 } 1604 1605 /* ----------------------------------------------------------------*/ 1606 1607 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool ); 1608 1609 /* 1610 ilu() under revised new data structure. 1611 Factored arrays bj and ba are stored as 1612 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1613 1614 bi=fact->i is an array of size n+1, in which 1615 bi+ 1616 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1617 bi[n]: points to L(n-1,n-1)+1 1618 1619 bdiag=fact->diag is an array of size n+1,in which 1620 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1621 bdiag[n]: points to entry of U(n-1,0)-1 1622 1623 U(i,:) contains bdiag[i] as its last entry, i.e., 1624 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1625 */ 1626 #undef __FUNCT__ 1627 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1628 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1629 { 1630 1631 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1632 PetscErrorCode ierr; 1633 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1634 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1635 PetscBool missing; 1636 IS isicol; 1637 1638 PetscFunctionBegin; 1639 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1640 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1641 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1642 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1643 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1644 b = (Mat_SeqAIJ*)(fact)->data; 1645 1646 /* allocate matrix arrays for new data structure */ 1647 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1648 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1649 b->singlemalloc = PETSC_TRUE; 1650 if (!b->diag){ 1651 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1652 ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1653 } 1654 bdiag = b->diag; 1655 1656 if (n > 0) { 1657 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1658 } 1659 1660 /* set bi and bj with new data structure */ 1661 bi = b->i; 1662 bj = b->j; 1663 1664 /* L part */ 1665 bi[0] = 0; 1666 for (i=0; i<n; i++){ 1667 nz = adiag[i] - ai[i]; 1668 bi[i+1] = bi[i] + nz; 1669 aj = a->j + ai[i]; 1670 for (j=0; j<nz; j++){ 1671 /* *bj = aj[j]; bj++; */ 1672 bj[k++] = aj[j]; 1673 } 1674 } 1675 1676 /* U part */ 1677 bdiag[n] = bi[n]-1; 1678 for (i=n-1; i>=0; i--){ 1679 nz = ai[i+1] - adiag[i] - 1; 1680 aj = a->j + adiag[i] + 1; 1681 for (j=0; j<nz; j++){ 1682 /* *bj = aj[j]; bj++; */ 1683 bj[k++] = aj[j]; 1684 } 1685 /* diag[i] */ 1686 /* *bj = i; bj++; */ 1687 bj[k++] = i; 1688 bdiag[i] = bdiag[i+1] + nz + 1; 1689 } 1690 1691 fact->factortype = MAT_FACTOR_ILU; 1692 fact->info.factor_mallocs = 0; 1693 fact->info.fill_ratio_given = info->fill; 1694 fact->info.fill_ratio_needed = 1.0; 1695 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1696 1697 b = (Mat_SeqAIJ*)(fact)->data; 1698 b->row = isrow; 1699 b->col = iscol; 1700 b->icol = isicol; 1701 ierr = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1702 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1703 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1709 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1710 { 1711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1712 IS isicol; 1713 PetscErrorCode ierr; 1714 const PetscInt *r,*ic; 1715 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1716 PetscInt *bi,*cols,nnz,*cols_lvl; 1717 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1718 PetscInt i,levels,diagonal_fill; 1719 PetscBool col_identity,row_identity; 1720 PetscReal f; 1721 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1722 PetscBT lnkbt; 1723 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1724 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1725 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1726 1727 PetscFunctionBegin; 1728 /* Uncomment the old data struct part only while testing new data structure for MatSolve() */ 1729 /* 1730 PetscBool olddatastruct=PETSC_FALSE; 1731 ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 1732 if(olddatastruct){ 1733 ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1734 PetscFunctionReturn(0); 1735 } 1736 */ 1737 1738 levels = (PetscInt)info->levels; 1739 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1740 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1741 if (!levels && row_identity && col_identity) { 1742 /* special case: ilu(0) with natural ordering */ 1743 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1744 if (a->inode.size) { 1745 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1746 } 1747 PetscFunctionReturn(0); 1748 } 1749 1750 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1751 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1752 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1753 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1754 1755 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1756 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1757 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1758 bi[0] = bdiag[0] = 0; 1759 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1760 1761 /* create a linked list for storing column indices of the active row */ 1762 nlnk = n + 1; 1763 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1764 1765 /* initial FreeSpace size is f*(ai[n]+1) */ 1766 f = info->fill; 1767 diagonal_fill = (PetscInt)info->diagonal_fill; 1768 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1769 current_space = free_space; 1770 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1771 current_space_lvl = free_space_lvl; 1772 for (i=0; i<n; i++) { 1773 nzi = 0; 1774 /* copy current row into linked list */ 1775 nnz = ai[r[i]+1] - ai[r[i]]; 1776 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1777 cols = aj + ai[r[i]]; 1778 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1779 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1780 nzi += nlnk; 1781 1782 /* make sure diagonal entry is included */ 1783 if (diagonal_fill && lnk[i] == -1) { 1784 fm = n; 1785 while (lnk[fm] < i) fm = lnk[fm]; 1786 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1787 lnk[fm] = i; 1788 lnk_lvl[i] = 0; 1789 nzi++; dcount++; 1790 } 1791 1792 /* add pivot rows into the active row */ 1793 nzbd = 0; 1794 prow = lnk[n]; 1795 while (prow < i) { 1796 nnz = bdiag[prow]; 1797 cols = bj_ptr[prow] + nnz + 1; 1798 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1799 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1800 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1801 nzi += nlnk; 1802 prow = lnk[prow]; 1803 nzbd++; 1804 } 1805 bdiag[i] = nzbd; 1806 bi[i+1] = bi[i] + nzi; 1807 /* if free space is not available, make more free space */ 1808 if (current_space->local_remaining<nzi) { 1809 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1810 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1811 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1812 reallocs++; 1813 } 1814 1815 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1816 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1817 bj_ptr[i] = current_space->array; 1818 bjlvl_ptr[i] = current_space_lvl->array; 1819 1820 /* make sure the active row i has diagonal entry */ 1821 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1822 1823 current_space->array += nzi; 1824 current_space->local_used += nzi; 1825 current_space->local_remaining -= nzi; 1826 current_space_lvl->array += nzi; 1827 current_space_lvl->local_used += nzi; 1828 current_space_lvl->local_remaining -= nzi; 1829 } 1830 1831 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1832 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1833 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1834 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1835 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1836 1837 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1838 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1839 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1840 1841 #if defined(PETSC_USE_INFO) 1842 { 1843 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1844 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1845 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1846 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1847 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1848 if (diagonal_fill) { 1849 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1850 } 1851 } 1852 #endif 1853 /* put together the new matrix */ 1854 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1855 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1856 b = (Mat_SeqAIJ*)(fact)->data; 1857 b->free_a = PETSC_TRUE; 1858 b->free_ij = PETSC_TRUE; 1859 b->singlemalloc = PETSC_FALSE; 1860 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1861 b->j = bj; 1862 b->i = bi; 1863 b->diag = bdiag; 1864 b->ilen = 0; 1865 b->imax = 0; 1866 b->row = isrow; 1867 b->col = iscol; 1868 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1869 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1870 b->icol = isicol; 1871 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1872 /* In b structure: Free imax, ilen, old a, old j. 1873 Allocate bdiag, solve_work, new a, new j */ 1874 ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1875 b->maxnz = b->nz = bdiag[0]+1; 1876 (fact)->info.factor_mallocs = reallocs; 1877 (fact)->info.fill_ratio_given = f; 1878 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1879 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1880 if (a->inode.size) { 1881 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1882 } 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1888 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1889 { 1890 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1891 IS isicol; 1892 PetscErrorCode ierr; 1893 const PetscInt *r,*ic; 1894 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1895 PetscInt *bi,*cols,nnz,*cols_lvl; 1896 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1897 PetscInt i,levels,diagonal_fill; 1898 PetscBool col_identity,row_identity; 1899 PetscReal f; 1900 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1901 PetscBT lnkbt; 1902 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1903 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1904 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1905 PetscBool missing; 1906 1907 PetscFunctionBegin; 1908 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1909 f = info->fill; 1910 levels = (PetscInt)info->levels; 1911 diagonal_fill = (PetscInt)info->diagonal_fill; 1912 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1913 1914 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1915 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1916 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1917 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1918 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1919 if (a->inode.size) { 1920 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1921 } 1922 fact->factortype = MAT_FACTOR_ILU; 1923 (fact)->info.factor_mallocs = 0; 1924 (fact)->info.fill_ratio_given = info->fill; 1925 (fact)->info.fill_ratio_needed = 1.0; 1926 b = (Mat_SeqAIJ*)(fact)->data; 1927 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1928 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1929 b->row = isrow; 1930 b->col = iscol; 1931 b->icol = isicol; 1932 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1933 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1934 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1939 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1940 1941 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1942 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1943 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1944 bi[0] = bdiag[0] = 0; 1945 1946 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1947 1948 /* create a linked list for storing column indices of the active row */ 1949 nlnk = n + 1; 1950 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1951 1952 /* initial FreeSpace size is f*(ai[n]+1) */ 1953 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1954 current_space = free_space; 1955 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1956 current_space_lvl = free_space_lvl; 1957 1958 for (i=0; i<n; i++) { 1959 nzi = 0; 1960 /* copy current row into linked list */ 1961 nnz = ai[r[i]+1] - ai[r[i]]; 1962 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1963 cols = aj + ai[r[i]]; 1964 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1965 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1966 nzi += nlnk; 1967 1968 /* make sure diagonal entry is included */ 1969 if (diagonal_fill && lnk[i] == -1) { 1970 fm = n; 1971 while (lnk[fm] < i) fm = lnk[fm]; 1972 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1973 lnk[fm] = i; 1974 lnk_lvl[i] = 0; 1975 nzi++; dcount++; 1976 } 1977 1978 /* add pivot rows into the active row */ 1979 nzbd = 0; 1980 prow = lnk[n]; 1981 while (prow < i) { 1982 nnz = bdiag[prow]; 1983 cols = bj_ptr[prow] + nnz + 1; 1984 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1985 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1986 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1987 nzi += nlnk; 1988 prow = lnk[prow]; 1989 nzbd++; 1990 } 1991 bdiag[i] = nzbd; 1992 bi[i+1] = bi[i] + nzi; 1993 1994 /* if free space is not available, make more free space */ 1995 if (current_space->local_remaining<nzi) { 1996 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1997 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1998 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1999 reallocs++; 2000 } 2001 2002 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2003 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2004 bj_ptr[i] = current_space->array; 2005 bjlvl_ptr[i] = current_space_lvl->array; 2006 2007 /* make sure the active row i has diagonal entry */ 2008 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2009 2010 current_space->array += nzi; 2011 current_space->local_used += nzi; 2012 current_space->local_remaining -= nzi; 2013 current_space_lvl->array += nzi; 2014 current_space_lvl->local_used += nzi; 2015 current_space_lvl->local_remaining -= nzi; 2016 } 2017 2018 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2019 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2020 2021 /* destroy list of free space and other temporary arrays */ 2022 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2023 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2024 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2025 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2026 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2027 2028 #if defined(PETSC_USE_INFO) 2029 { 2030 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2031 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2032 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2033 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2034 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2035 if (diagonal_fill) { 2036 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2037 } 2038 } 2039 #endif 2040 2041 /* put together the new matrix */ 2042 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2043 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2044 b = (Mat_SeqAIJ*)(fact)->data; 2045 b->free_a = PETSC_TRUE; 2046 b->free_ij = PETSC_TRUE; 2047 b->singlemalloc = PETSC_FALSE; 2048 ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2049 b->j = bj; 2050 b->i = bi; 2051 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2052 b->diag = bdiag; 2053 b->ilen = 0; 2054 b->imax = 0; 2055 b->row = isrow; 2056 b->col = iscol; 2057 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2058 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2059 b->icol = isicol; 2060 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2061 /* In b structure: Free imax, ilen, old a, old j. 2062 Allocate bdiag, solve_work, new a, new j */ 2063 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2064 b->maxnz = b->nz = bi[n] ; 2065 (fact)->info.factor_mallocs = reallocs; 2066 (fact)->info.fill_ratio_given = f; 2067 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2068 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2069 if (a->inode.size) { 2070 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2077 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2078 { 2079 Mat C = B; 2080 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2081 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2082 IS ip=b->row,iip = b->icol; 2083 PetscErrorCode ierr; 2084 const PetscInt *rip,*riip; 2085 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2086 PetscInt *ai=a->i,*aj=a->j; 2087 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2088 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2089 PetscBool perm_identity; 2090 FactorShiftCtx sctx; 2091 PetscReal rs; 2092 MatScalar d,*v; 2093 2094 PetscFunctionBegin; 2095 /* MatPivotSetUp(): initialize shift context sctx */ 2096 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2097 2098 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2099 sctx.shift_top = info->zeropivot; 2100 for (i=0; i<mbs; i++) { 2101 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2102 d = (aa)[a->diag[i]]; 2103 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2104 v = aa+ai[i]; 2105 nz = ai[i+1] - ai[i]; 2106 for (j=0; j<nz; j++) 2107 rs += PetscAbsScalar(v[j]); 2108 if (rs>sctx.shift_top) sctx.shift_top = rs; 2109 } 2110 sctx.shift_top *= 1.1; 2111 sctx.nshift_max = 5; 2112 sctx.shift_lo = 0.; 2113 sctx.shift_hi = 1.; 2114 } 2115 2116 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2117 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2118 2119 /* allocate working arrays 2120 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2121 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2122 */ 2123 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 2124 2125 do { 2126 sctx.newshift = PETSC_FALSE; 2127 2128 for (i=0; i<mbs; i++) c2r[i] = mbs; 2129 il[0] = 0; 2130 2131 for (k = 0; k<mbs; k++){ 2132 /* zero rtmp */ 2133 nz = bi[k+1] - bi[k]; 2134 bjtmp = bj + bi[k]; 2135 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2136 2137 /* load in initial unfactored row */ 2138 bval = ba + bi[k]; 2139 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2140 for (j = jmin; j < jmax; j++){ 2141 col = riip[aj[j]]; 2142 if (col >= k){ /* only take upper triangular entry */ 2143 rtmp[col] = aa[j]; 2144 *bval++ = 0.0; /* for in-place factorization */ 2145 } 2146 } 2147 /* shift the diagonal of the matrix: ZeropivotApply() */ 2148 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2149 2150 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2151 dk = rtmp[k]; 2152 i = c2r[k]; /* first row to be added to k_th row */ 2153 2154 while (i < k){ 2155 nexti = c2r[i]; /* next row to be added to k_th row */ 2156 2157 /* compute multiplier, update diag(k) and U(i,k) */ 2158 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2159 uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2160 dk += uikdi*ba[ili]; /* update diag[k] */ 2161 ba[ili] = uikdi; /* -U(i,k) */ 2162 2163 /* add multiple of row i to k-th row */ 2164 jmin = ili + 1; jmax = bi[i+1]; 2165 if (jmin < jmax){ 2166 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2167 /* update il and c2r for row i */ 2168 il[i] = jmin; 2169 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2170 } 2171 i = nexti; 2172 } 2173 2174 /* copy data into U(k,:) */ 2175 rs = 0.0; 2176 jmin = bi[k]; jmax = bi[k+1]-1; 2177 if (jmin < jmax) { 2178 for (j=jmin; j<jmax; j++){ 2179 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2180 } 2181 /* add the k-th row into il and c2r */ 2182 il[k] = jmin; 2183 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2184 } 2185 2186 /* MatPivotCheck() */ 2187 sctx.rs = rs; 2188 sctx.pv = dk; 2189 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2190 if(sctx.newshift) break; 2191 dk = sctx.pv; 2192 2193 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2194 } 2195 } while (sctx.newshift); 2196 2197 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2198 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2199 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2200 2201 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2202 if (perm_identity){ 2203 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2204 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2205 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2206 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2207 } else { 2208 B->ops->solve = MatSolve_SeqSBAIJ_1; 2209 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2210 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2211 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2212 } 2213 2214 C->assembled = PETSC_TRUE; 2215 C->preallocated = PETSC_TRUE; 2216 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2217 2218 /* MatPivotView() */ 2219 if (sctx.nshift){ 2220 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2221 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 2222 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2223 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2224 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 2225 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 2226 } 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2233 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2234 { 2235 Mat C = B; 2236 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2237 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2238 IS ip=b->row,iip = b->icol; 2239 PetscErrorCode ierr; 2240 const PetscInt *rip,*riip; 2241 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2242 PetscInt *ai=a->i,*aj=a->j; 2243 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2244 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2245 PetscBool perm_identity; 2246 FactorShiftCtx sctx; 2247 PetscReal rs; 2248 MatScalar d,*v; 2249 2250 PetscFunctionBegin; 2251 /* MatPivotSetUp(): initialize shift context sctx */ 2252 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2253 2254 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2255 sctx.shift_top = info->zeropivot; 2256 for (i=0; i<mbs; i++) { 2257 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2258 d = (aa)[a->diag[i]]; 2259 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2260 v = aa+ai[i]; 2261 nz = ai[i+1] - ai[i]; 2262 for (j=0; j<nz; j++) 2263 rs += PetscAbsScalar(v[j]); 2264 if (rs>sctx.shift_top) sctx.shift_top = rs; 2265 } 2266 sctx.shift_top *= 1.1; 2267 sctx.nshift_max = 5; 2268 sctx.shift_lo = 0.; 2269 sctx.shift_hi = 1.; 2270 } 2271 2272 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2273 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2274 2275 /* initialization */ 2276 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 2277 2278 do { 2279 sctx.newshift = PETSC_FALSE; 2280 2281 for (i=0; i<mbs; i++) jl[i] = mbs; 2282 il[0] = 0; 2283 2284 for (k = 0; k<mbs; k++){ 2285 /* zero rtmp */ 2286 nz = bi[k+1] - bi[k]; 2287 bjtmp = bj + bi[k]; 2288 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2289 2290 bval = ba + bi[k]; 2291 /* initialize k-th row by the perm[k]-th row of A */ 2292 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2293 for (j = jmin; j < jmax; j++){ 2294 col = riip[aj[j]]; 2295 if (col >= k){ /* only take upper triangular entry */ 2296 rtmp[col] = aa[j]; 2297 *bval++ = 0.0; /* for in-place factorization */ 2298 } 2299 } 2300 /* shift the diagonal of the matrix */ 2301 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2302 2303 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2304 dk = rtmp[k]; 2305 i = jl[k]; /* first row to be added to k_th row */ 2306 2307 while (i < k){ 2308 nexti = jl[i]; /* next row to be added to k_th row */ 2309 2310 /* compute multiplier, update diag(k) and U(i,k) */ 2311 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2312 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2313 dk += uikdi*ba[ili]; 2314 ba[ili] = uikdi; /* -U(i,k) */ 2315 2316 /* add multiple of row i to k-th row */ 2317 jmin = ili + 1; jmax = bi[i+1]; 2318 if (jmin < jmax){ 2319 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2320 /* update il and jl for row i */ 2321 il[i] = jmin; 2322 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2323 } 2324 i = nexti; 2325 } 2326 2327 /* shift the diagonals when zero pivot is detected */ 2328 /* compute rs=sum of abs(off-diagonal) */ 2329 rs = 0.0; 2330 jmin = bi[k]+1; 2331 nz = bi[k+1] - jmin; 2332 bcol = bj + jmin; 2333 for (j=0; j<nz; j++) { 2334 rs += PetscAbsScalar(rtmp[bcol[j]]); 2335 } 2336 2337 sctx.rs = rs; 2338 sctx.pv = dk; 2339 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2340 if (sctx.newshift) break; 2341 dk = sctx.pv; 2342 2343 /* copy data into U(k,:) */ 2344 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2345 jmin = bi[k]+1; jmax = bi[k+1]; 2346 if (jmin < jmax) { 2347 for (j=jmin; j<jmax; j++){ 2348 col = bj[j]; ba[j] = rtmp[col]; 2349 } 2350 /* add the k-th row into il and jl */ 2351 il[k] = jmin; 2352 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2353 } 2354 } 2355 } while (sctx.newshift); 2356 2357 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2358 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2359 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2360 2361 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2362 if (perm_identity){ 2363 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2364 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2365 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2366 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2367 } else { 2368 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2369 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2370 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2371 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2372 } 2373 2374 C->assembled = PETSC_TRUE; 2375 C->preallocated = PETSC_TRUE; 2376 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2377 if (sctx.nshift){ 2378 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2379 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2380 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2381 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2382 } 2383 } 2384 PetscFunctionReturn(0); 2385 } 2386 2387 /* 2388 icc() under revised new data structure. 2389 Factored arrays bj and ba are stored as 2390 U(0,:),...,U(i,:),U(n-1,:) 2391 2392 ui=fact->i is an array of size n+1, in which 2393 ui+ 2394 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2395 ui[n]: points to U(n-1,n-1)+1 2396 2397 udiag=fact->diag is an array of size n,in which 2398 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2399 2400 U(i,:) contains udiag[i] as its last entry, i.e., 2401 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2402 */ 2403 2404 #undef __FUNCT__ 2405 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2406 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2407 { 2408 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2409 Mat_SeqSBAIJ *b; 2410 PetscErrorCode ierr; 2411 PetscBool perm_identity,missing; 2412 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2413 const PetscInt *rip,*riip; 2414 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2415 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2416 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2417 PetscReal fill=info->fill,levels=info->levels; 2418 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2419 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2420 PetscBT lnkbt; 2421 IS iperm; 2422 2423 PetscFunctionBegin; 2424 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2425 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2426 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2427 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2428 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2429 2430 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2431 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2432 ui[0] = 0; 2433 2434 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2435 if (!levels && perm_identity) { 2436 for (i=0; i<am; i++) { 2437 ncols = ai[i+1] - a->diag[i]; 2438 ui[i+1] = ui[i] + ncols; 2439 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2440 } 2441 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2442 cols = uj; 2443 for (i=0; i<am; i++) { 2444 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2445 ncols = ai[i+1] - a->diag[i] -1; 2446 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2447 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2448 } 2449 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2450 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2451 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2452 2453 /* initialization */ 2454 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2455 2456 /* jl: linked list for storing indices of the pivot rows 2457 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2458 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2459 for (i=0; i<am; i++){ 2460 jl[i] = am; il[i] = 0; 2461 } 2462 2463 /* create and initialize a linked list for storing column indices of the active row k */ 2464 nlnk = am + 1; 2465 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2466 2467 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2468 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2469 current_space = free_space; 2470 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2471 current_space_lvl = free_space_lvl; 2472 2473 for (k=0; k<am; k++){ /* for each active row k */ 2474 /* initialize lnk by the column indices of row rip[k] of A */ 2475 nzk = 0; 2476 ncols = ai[rip[k]+1] - ai[rip[k]]; 2477 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2478 ncols_upper = 0; 2479 for (j=0; j<ncols; j++){ 2480 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2481 if (riip[i] >= k){ /* only take upper triangular entry */ 2482 ajtmp[ncols_upper] = i; 2483 ncols_upper++; 2484 } 2485 } 2486 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2487 nzk += nlnk; 2488 2489 /* update lnk by computing fill-in for each pivot row to be merged in */ 2490 prow = jl[k]; /* 1st pivot row */ 2491 2492 while (prow < k){ 2493 nextprow = jl[prow]; 2494 2495 /* merge prow into k-th row */ 2496 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2497 jmax = ui[prow+1]; 2498 ncols = jmax-jmin; 2499 i = jmin - ui[prow]; 2500 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2501 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2502 j = *(uj - 1); 2503 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2504 nzk += nlnk; 2505 2506 /* update il and jl for prow */ 2507 if (jmin < jmax){ 2508 il[prow] = jmin; 2509 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2510 } 2511 prow = nextprow; 2512 } 2513 2514 /* if free space is not available, make more free space */ 2515 if (current_space->local_remaining<nzk) { 2516 i = am - k + 1; /* num of unfactored rows */ 2517 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2518 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2519 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2520 reallocs++; 2521 } 2522 2523 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2524 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2525 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2526 2527 /* add the k-th row into il and jl */ 2528 if (nzk > 1){ 2529 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2530 jl[k] = jl[i]; jl[i] = k; 2531 il[k] = ui[k] + 1; 2532 } 2533 uj_ptr[k] = current_space->array; 2534 uj_lvl_ptr[k] = current_space_lvl->array; 2535 2536 current_space->array += nzk; 2537 current_space->local_used += nzk; 2538 current_space->local_remaining -= nzk; 2539 2540 current_space_lvl->array += nzk; 2541 current_space_lvl->local_used += nzk; 2542 current_space_lvl->local_remaining -= nzk; 2543 2544 ui[k+1] = ui[k] + nzk; 2545 } 2546 2547 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2548 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2549 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2550 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2551 2552 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2553 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2554 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2555 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2556 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2557 2558 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2559 2560 /* put together the new matrix in MATSEQSBAIJ format */ 2561 b = (Mat_SeqSBAIJ*)(fact)->data; 2562 b->singlemalloc = PETSC_FALSE; 2563 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2564 b->j = uj; 2565 b->i = ui; 2566 b->diag = udiag; 2567 b->free_diag = PETSC_TRUE; 2568 b->ilen = 0; 2569 b->imax = 0; 2570 b->row = perm; 2571 b->col = perm; 2572 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2573 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2574 b->icol = iperm; 2575 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2576 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2577 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2578 b->maxnz = b->nz = ui[am]; 2579 b->free_a = PETSC_TRUE; 2580 b->free_ij = PETSC_TRUE; 2581 2582 fact->info.factor_mallocs = reallocs; 2583 fact->info.fill_ratio_given = fill; 2584 if (ai[am] != 0) { 2585 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2586 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2587 } else { 2588 fact->info.fill_ratio_needed = 0.0; 2589 } 2590 #if defined(PETSC_USE_INFO) 2591 if (ai[am] != 0) { 2592 PetscReal af = fact->info.fill_ratio_needed; 2593 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2594 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2595 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2596 } else { 2597 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2598 } 2599 #endif 2600 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2601 PetscFunctionReturn(0); 2602 } 2603 2604 #undef __FUNCT__ 2605 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2606 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2607 { 2608 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2609 Mat_SeqSBAIJ *b; 2610 PetscErrorCode ierr; 2611 PetscBool perm_identity,missing; 2612 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2613 const PetscInt *rip,*riip; 2614 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2615 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2616 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2617 PetscReal fill=info->fill,levels=info->levels; 2618 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2619 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2620 PetscBT lnkbt; 2621 IS iperm; 2622 2623 PetscFunctionBegin; 2624 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2625 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2626 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2627 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2628 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2629 2630 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2631 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2632 ui[0] = 0; 2633 2634 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2635 if (!levels && perm_identity) { 2636 2637 for (i=0; i<am; i++) { 2638 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2639 udiag[i] = ui[i]; 2640 } 2641 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2642 cols = uj; 2643 for (i=0; i<am; i++) { 2644 aj = a->j + a->diag[i]; 2645 ncols = ui[i+1] - ui[i]; 2646 for (j=0; j<ncols; j++) *cols++ = *aj++; 2647 } 2648 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2649 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2650 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2651 2652 /* initialization */ 2653 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2654 2655 /* jl: linked list for storing indices of the pivot rows 2656 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2657 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2658 for (i=0; i<am; i++){ 2659 jl[i] = am; il[i] = 0; 2660 } 2661 2662 /* create and initialize a linked list for storing column indices of the active row k */ 2663 nlnk = am + 1; 2664 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2665 2666 /* initial FreeSpace size is fill*(ai[am]+1) */ 2667 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2668 current_space = free_space; 2669 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2670 current_space_lvl = free_space_lvl; 2671 2672 for (k=0; k<am; k++){ /* for each active row k */ 2673 /* initialize lnk by the column indices of row rip[k] of A */ 2674 nzk = 0; 2675 ncols = ai[rip[k]+1] - ai[rip[k]]; 2676 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2677 ncols_upper = 0; 2678 for (j=0; j<ncols; j++){ 2679 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2680 if (riip[i] >= k){ /* only take upper triangular entry */ 2681 ajtmp[ncols_upper] = i; 2682 ncols_upper++; 2683 } 2684 } 2685 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2686 nzk += nlnk; 2687 2688 /* update lnk by computing fill-in for each pivot row to be merged in */ 2689 prow = jl[k]; /* 1st pivot row */ 2690 2691 while (prow < k){ 2692 nextprow = jl[prow]; 2693 2694 /* merge prow into k-th row */ 2695 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2696 jmax = ui[prow+1]; 2697 ncols = jmax-jmin; 2698 i = jmin - ui[prow]; 2699 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2700 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2701 j = *(uj - 1); 2702 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2703 nzk += nlnk; 2704 2705 /* update il and jl for prow */ 2706 if (jmin < jmax){ 2707 il[prow] = jmin; 2708 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2709 } 2710 prow = nextprow; 2711 } 2712 2713 /* if free space is not available, make more free space */ 2714 if (current_space->local_remaining<nzk) { 2715 i = am - k + 1; /* num of unfactored rows */ 2716 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2717 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2718 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2719 reallocs++; 2720 } 2721 2722 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2723 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2724 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2725 2726 /* add the k-th row into il and jl */ 2727 if (nzk > 1){ 2728 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2729 jl[k] = jl[i]; jl[i] = k; 2730 il[k] = ui[k] + 1; 2731 } 2732 uj_ptr[k] = current_space->array; 2733 uj_lvl_ptr[k] = current_space_lvl->array; 2734 2735 current_space->array += nzk; 2736 current_space->local_used += nzk; 2737 current_space->local_remaining -= nzk; 2738 2739 current_space_lvl->array += nzk; 2740 current_space_lvl->local_used += nzk; 2741 current_space_lvl->local_remaining -= nzk; 2742 2743 ui[k+1] = ui[k] + nzk; 2744 } 2745 2746 #if defined(PETSC_USE_INFO) 2747 if (ai[am] != 0) { 2748 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2749 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2750 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2751 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2752 } else { 2753 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2754 } 2755 #endif 2756 2757 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2758 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2759 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2760 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2761 2762 /* destroy list of free space and other temporary array(s) */ 2763 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2764 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2765 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2766 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2767 2768 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2769 2770 /* put together the new matrix in MATSEQSBAIJ format */ 2771 2772 b = (Mat_SeqSBAIJ*)fact->data; 2773 b->singlemalloc = PETSC_FALSE; 2774 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2775 b->j = uj; 2776 b->i = ui; 2777 b->diag = udiag; 2778 b->free_diag = PETSC_TRUE; 2779 b->ilen = 0; 2780 b->imax = 0; 2781 b->row = perm; 2782 b->col = perm; 2783 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2784 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2785 b->icol = iperm; 2786 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2787 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2788 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2789 b->maxnz = b->nz = ui[am]; 2790 b->free_a = PETSC_TRUE; 2791 b->free_ij = PETSC_TRUE; 2792 2793 fact->info.factor_mallocs = reallocs; 2794 fact->info.fill_ratio_given = fill; 2795 if (ai[am] != 0) { 2796 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2797 } else { 2798 fact->info.fill_ratio_needed = 0.0; 2799 } 2800 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2801 PetscFunctionReturn(0); 2802 } 2803 2804 #undef __FUNCT__ 2805 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2806 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2807 { 2808 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2809 Mat_SeqSBAIJ *b; 2810 PetscErrorCode ierr; 2811 PetscBool perm_identity; 2812 PetscReal fill = info->fill; 2813 const PetscInt *rip,*riip; 2814 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2815 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2816 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2817 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2818 PetscBT lnkbt; 2819 IS iperm; 2820 2821 PetscFunctionBegin; 2822 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2823 /* check whether perm is the identity mapping */ 2824 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2825 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2826 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2827 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2828 2829 /* initialization */ 2830 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2831 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2832 ui[0] = 0; 2833 2834 /* jl: linked list for storing indices of the pivot rows 2835 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2836 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2837 for (i=0; i<am; i++){ 2838 jl[i] = am; il[i] = 0; 2839 } 2840 2841 /* create and initialize a linked list for storing column indices of the active row k */ 2842 nlnk = am + 1; 2843 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2844 2845 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2846 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2847 current_space = free_space; 2848 2849 for (k=0; k<am; k++){ /* for each active row k */ 2850 /* initialize lnk by the column indices of row rip[k] of A */ 2851 nzk = 0; 2852 ncols = ai[rip[k]+1] - ai[rip[k]]; 2853 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2854 ncols_upper = 0; 2855 for (j=0; j<ncols; j++){ 2856 i = riip[*(aj + ai[rip[k]] + j)]; 2857 if (i >= k){ /* only take upper triangular entry */ 2858 cols[ncols_upper] = i; 2859 ncols_upper++; 2860 } 2861 } 2862 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2863 nzk += nlnk; 2864 2865 /* update lnk by computing fill-in for each pivot row to be merged in */ 2866 prow = jl[k]; /* 1st pivot row */ 2867 2868 while (prow < k){ 2869 nextprow = jl[prow]; 2870 /* merge prow into k-th row */ 2871 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2872 jmax = ui[prow+1]; 2873 ncols = jmax-jmin; 2874 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2875 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2876 nzk += nlnk; 2877 2878 /* update il and jl for prow */ 2879 if (jmin < jmax){ 2880 il[prow] = jmin; 2881 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 2882 } 2883 prow = nextprow; 2884 } 2885 2886 /* if free space is not available, make more free space */ 2887 if (current_space->local_remaining<nzk) { 2888 i = am - k + 1; /* num of unfactored rows */ 2889 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2890 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2891 reallocs++; 2892 } 2893 2894 /* copy data into free space, then initialize lnk */ 2895 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2896 2897 /* add the k-th row into il and jl */ 2898 if (nzk > 1){ 2899 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2900 jl[k] = jl[i]; jl[i] = k; 2901 il[k] = ui[k] + 1; 2902 } 2903 ui_ptr[k] = current_space->array; 2904 current_space->array += nzk; 2905 current_space->local_used += nzk; 2906 current_space->local_remaining -= nzk; 2907 2908 ui[k+1] = ui[k] + nzk; 2909 } 2910 2911 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2912 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2913 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2914 2915 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2916 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2917 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2918 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2919 2920 /* put together the new matrix in MATSEQSBAIJ format */ 2921 2922 b = (Mat_SeqSBAIJ*)fact->data; 2923 b->singlemalloc = PETSC_FALSE; 2924 b->free_a = PETSC_TRUE; 2925 b->free_ij = PETSC_TRUE; 2926 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2927 b->j = uj; 2928 b->i = ui; 2929 b->diag = udiag; 2930 b->free_diag = PETSC_TRUE; 2931 b->ilen = 0; 2932 b->imax = 0; 2933 b->row = perm; 2934 b->col = perm; 2935 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2936 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2937 b->icol = iperm; 2938 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2939 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2940 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2941 b->maxnz = b->nz = ui[am]; 2942 2943 fact->info.factor_mallocs = reallocs; 2944 fact->info.fill_ratio_given = fill; 2945 if (ai[am] != 0) { 2946 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2947 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2948 } else { 2949 fact->info.fill_ratio_needed = 0.0; 2950 } 2951 #if defined(PETSC_USE_INFO) 2952 if (ai[am] != 0) { 2953 PetscReal af = fact->info.fill_ratio_needed; 2954 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2955 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2956 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2957 } else { 2958 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2959 } 2960 #endif 2961 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2962 PetscFunctionReturn(0); 2963 } 2964 2965 #undef __FUNCT__ 2966 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 2967 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2968 { 2969 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2970 Mat_SeqSBAIJ *b; 2971 PetscErrorCode ierr; 2972 PetscBool perm_identity; 2973 PetscReal fill = info->fill; 2974 const PetscInt *rip,*riip; 2975 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2976 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2977 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2978 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2979 PetscBT lnkbt; 2980 IS iperm; 2981 2982 PetscFunctionBegin; 2983 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2984 /* check whether perm is the identity mapping */ 2985 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2986 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2987 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2988 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2989 2990 /* initialization */ 2991 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2992 ui[0] = 0; 2993 2994 /* jl: linked list for storing indices of the pivot rows 2995 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2996 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2997 for (i=0; i<am; i++){ 2998 jl[i] = am; il[i] = 0; 2999 } 3000 3001 /* create and initialize a linked list for storing column indices of the active row k */ 3002 nlnk = am + 1; 3003 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3004 3005 /* initial FreeSpace size is fill*(ai[am]+1) */ 3006 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3007 current_space = free_space; 3008 3009 for (k=0; k<am; k++){ /* for each active row k */ 3010 /* initialize lnk by the column indices of row rip[k] of A */ 3011 nzk = 0; 3012 ncols = ai[rip[k]+1] - ai[rip[k]]; 3013 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3014 ncols_upper = 0; 3015 for (j=0; j<ncols; j++){ 3016 i = riip[*(aj + ai[rip[k]] + j)]; 3017 if (i >= k){ /* only take upper triangular entry */ 3018 cols[ncols_upper] = i; 3019 ncols_upper++; 3020 } 3021 } 3022 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3023 nzk += nlnk; 3024 3025 /* update lnk by computing fill-in for each pivot row to be merged in */ 3026 prow = jl[k]; /* 1st pivot row */ 3027 3028 while (prow < k){ 3029 nextprow = jl[prow]; 3030 /* merge prow into k-th row */ 3031 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3032 jmax = ui[prow+1]; 3033 ncols = jmax-jmin; 3034 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3035 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3036 nzk += nlnk; 3037 3038 /* update il and jl for prow */ 3039 if (jmin < jmax){ 3040 il[prow] = jmin; 3041 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3042 } 3043 prow = nextprow; 3044 } 3045 3046 /* if free space is not available, make more free space */ 3047 if (current_space->local_remaining<nzk) { 3048 i = am - k + 1; /* num of unfactored rows */ 3049 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3050 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3051 reallocs++; 3052 } 3053 3054 /* copy data into free space, then initialize lnk */ 3055 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3056 3057 /* add the k-th row into il and jl */ 3058 if (nzk-1 > 0){ 3059 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3060 jl[k] = jl[i]; jl[i] = k; 3061 il[k] = ui[k] + 1; 3062 } 3063 ui_ptr[k] = current_space->array; 3064 current_space->array += nzk; 3065 current_space->local_used += nzk; 3066 current_space->local_remaining -= nzk; 3067 3068 ui[k+1] = ui[k] + nzk; 3069 } 3070 3071 #if defined(PETSC_USE_INFO) 3072 if (ai[am] != 0) { 3073 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3074 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3075 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3076 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3077 } else { 3078 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3079 } 3080 #endif 3081 3082 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3083 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3084 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3085 3086 /* destroy list of free space and other temporary array(s) */ 3087 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3088 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3089 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3090 3091 /* put together the new matrix in MATSEQSBAIJ format */ 3092 3093 b = (Mat_SeqSBAIJ*)fact->data; 3094 b->singlemalloc = PETSC_FALSE; 3095 b->free_a = PETSC_TRUE; 3096 b->free_ij = PETSC_TRUE; 3097 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 3098 b->j = uj; 3099 b->i = ui; 3100 b->diag = 0; 3101 b->ilen = 0; 3102 b->imax = 0; 3103 b->row = perm; 3104 b->col = perm; 3105 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3106 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3107 b->icol = iperm; 3108 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3109 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3110 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3111 b->maxnz = b->nz = ui[am]; 3112 3113 fact->info.factor_mallocs = reallocs; 3114 fact->info.fill_ratio_given = fill; 3115 if (ai[am] != 0) { 3116 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3117 } else { 3118 fact->info.fill_ratio_needed = 0.0; 3119 } 3120 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3121 PetscFunctionReturn(0); 3122 } 3123 3124 #undef __FUNCT__ 3125 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3126 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3127 { 3128 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3129 PetscErrorCode ierr; 3130 PetscInt n = A->rmap->n; 3131 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3132 PetscScalar *x,sum; 3133 const PetscScalar *b; 3134 const MatScalar *aa = a->a,*v; 3135 PetscInt i,nz; 3136 3137 PetscFunctionBegin; 3138 if (!n) PetscFunctionReturn(0); 3139 3140 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3141 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3142 3143 /* forward solve the lower triangular */ 3144 x[0] = b[0]; 3145 v = aa; 3146 vi = aj; 3147 for (i=1; i<n; i++) { 3148 nz = ai[i+1] - ai[i]; 3149 sum = b[i]; 3150 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3151 v += nz; 3152 vi += nz; 3153 x[i] = sum; 3154 } 3155 3156 /* backward solve the upper triangular */ 3157 for (i=n-1; i>=0; i--){ 3158 v = aa + adiag[i+1] + 1; 3159 vi = aj + adiag[i+1] + 1; 3160 nz = adiag[i] - adiag[i+1]-1; 3161 sum = x[i]; 3162 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3163 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3164 } 3165 3166 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3167 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3168 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3169 PetscFunctionReturn(0); 3170 } 3171 3172 #undef __FUNCT__ 3173 #define __FUNCT__ "MatSolve_SeqAIJ" 3174 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3175 { 3176 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3177 IS iscol = a->col,isrow = a->row; 3178 PetscErrorCode ierr; 3179 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3180 const PetscInt *rout,*cout,*r,*c; 3181 PetscScalar *x,*tmp,sum; 3182 const PetscScalar *b; 3183 const MatScalar *aa = a->a,*v; 3184 3185 PetscFunctionBegin; 3186 if (!n) PetscFunctionReturn(0); 3187 3188 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3189 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3190 tmp = a->solve_work; 3191 3192 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3193 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3194 3195 /* forward solve the lower triangular */ 3196 tmp[0] = b[r[0]]; 3197 v = aa; 3198 vi = aj; 3199 for (i=1; i<n; i++) { 3200 nz = ai[i+1] - ai[i]; 3201 sum = b[r[i]]; 3202 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3203 tmp[i] = sum; 3204 v += nz; vi += nz; 3205 } 3206 3207 /* backward solve the upper triangular */ 3208 for (i=n-1; i>=0; i--){ 3209 v = aa + adiag[i+1]+1; 3210 vi = aj + adiag[i+1]+1; 3211 nz = adiag[i]-adiag[i+1]-1; 3212 sum = tmp[i]; 3213 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3214 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3215 } 3216 3217 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3218 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3219 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3220 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3221 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3222 PetscFunctionReturn(0); 3223 } 3224 3225 #undef __FUNCT__ 3226 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3227 /* 3228 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3229 */ 3230 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3231 { 3232 Mat B = *fact; 3233 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3234 IS isicol; 3235 PetscErrorCode ierr; 3236 const PetscInt *r,*ic; 3237 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3238 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3239 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3240 PetscInt nlnk,*lnk; 3241 PetscBT lnkbt; 3242 PetscBool row_identity,icol_identity; 3243 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3244 const PetscInt *ics; 3245 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3246 PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount; 3247 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3248 PetscBool missing; 3249 3250 PetscFunctionBegin; 3251 3252 if (dt == PETSC_DEFAULT) dt = 0.005; 3253 if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */ 3254 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3255 3256 /* ------- symbolic factorization, can be reused ---------*/ 3257 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3258 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3259 adiag=a->diag; 3260 3261 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3262 3263 /* bdiag is location of diagonal in factor */ 3264 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3265 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3266 3267 /* allocate row pointers bi */ 3268 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3269 3270 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3271 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3272 nnz_max = ai[n]+2*n*dtcount+2; 3273 3274 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3275 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3276 3277 /* put together the new matrix */ 3278 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3279 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3280 b = (Mat_SeqAIJ*)B->data; 3281 b->free_a = PETSC_TRUE; 3282 b->free_ij = PETSC_TRUE; 3283 b->singlemalloc = PETSC_FALSE; 3284 b->a = ba; 3285 b->j = bj; 3286 b->i = bi; 3287 b->diag = bdiag; 3288 b->ilen = 0; 3289 b->imax = 0; 3290 b->row = isrow; 3291 b->col = iscol; 3292 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3293 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3294 b->icol = isicol; 3295 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3296 3297 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3298 b->maxnz = nnz_max; 3299 3300 B->factortype = MAT_FACTOR_ILUDT; 3301 B->info.factor_mallocs = 0; 3302 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3303 CHKMEMQ; 3304 /* ------- end of symbolic factorization ---------*/ 3305 3306 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3307 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3308 ics = ic; 3309 3310 /* linked list for storing column indices of the active row */ 3311 nlnk = n + 1; 3312 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3313 3314 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3315 ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr); 3316 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3317 ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr); 3318 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3319 3320 bi[0] = 0; 3321 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3322 bdiag_rev[n] = bdiag[0]; 3323 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3324 for (i=0; i<n; i++) { 3325 /* copy initial fill into linked list */ 3326 nzi = 0; /* nonzeros for active row i */ 3327 nzi = ai[r[i]+1] - ai[r[i]]; 3328 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3329 nzi_al = adiag[r[i]] - ai[r[i]]; 3330 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3331 ajtmp = aj + ai[r[i]]; 3332 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3333 3334 /* load in initial (unfactored row) */ 3335 aatmp = a->a + ai[r[i]]; 3336 for (j=0; j<nzi; j++) { 3337 rtmp[ics[*ajtmp++]] = *aatmp++; 3338 } 3339 3340 /* add pivot rows into linked list */ 3341 row = lnk[n]; 3342 while (row < i ) { 3343 nzi_bl = bi[row+1] - bi[row] + 1; 3344 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3345 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3346 nzi += nlnk; 3347 row = lnk[row]; 3348 } 3349 3350 /* copy data from lnk into jtmp, then initialize lnk */ 3351 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3352 3353 /* numerical factorization */ 3354 bjtmp = jtmp; 3355 row = *bjtmp++; /* 1st pivot row */ 3356 while ( row < i ) { 3357 pc = rtmp + row; 3358 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3359 multiplier = (*pc) * (*pv); 3360 *pc = multiplier; 3361 if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */ 3362 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3363 pv = ba + bdiag[row+1] + 1; 3364 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3365 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3366 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3367 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3368 } 3369 row = *bjtmp++; 3370 } 3371 3372 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3373 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3374 nzi_bl = 0; j = 0; 3375 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 3376 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3377 nzi_bl++; j++; 3378 } 3379 nzi_bu = nzi - nzi_bl -1; 3380 while (j < nzi){ 3381 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3382 j++; 3383 } 3384 3385 bjtmp = bj + bi[i]; 3386 batmp = ba + bi[i]; 3387 /* apply level dropping rule to L part */ 3388 ncut = nzi_al + dtcount; 3389 if (ncut < nzi_bl){ 3390 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3391 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3392 } else { 3393 ncut = nzi_bl; 3394 } 3395 for (j=0; j<ncut; j++){ 3396 bjtmp[j] = jtmp[j]; 3397 batmp[j] = vtmp[j]; 3398 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3399 } 3400 bi[i+1] = bi[i] + ncut; 3401 nzi = ncut + 1; 3402 3403 /* apply level dropping rule to U part */ 3404 ncut = nzi_au + dtcount; 3405 if (ncut < nzi_bu){ 3406 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3407 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3408 } else { 3409 ncut = nzi_bu; 3410 } 3411 nzi += ncut; 3412 3413 /* mark bdiagonal */ 3414 bdiag[i+1] = bdiag[i] - (ncut + 1); 3415 bdiag_rev[n-i-1] = bdiag[i+1]; 3416 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3417 bjtmp = bj + bdiag[i]; 3418 batmp = ba + bdiag[i]; 3419 *bjtmp = i; 3420 *batmp = diag_tmp; /* rtmp[i]; */ 3421 if (*batmp == 0.0) { 3422 *batmp = dt+shift; 3423 /* printf(" row %d add shift %g\n",i,shift); */ 3424 } 3425 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3426 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3427 3428 bjtmp = bj + bdiag[i+1]+1; 3429 batmp = ba + bdiag[i+1]+1; 3430 for (k=0; k<ncut; k++){ 3431 bjtmp[k] = jtmp[nzi_bl+1+k]; 3432 batmp[k] = vtmp[nzi_bl+1+k]; 3433 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3434 } 3435 /* printf("\n"); */ 3436 3437 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3438 /* 3439 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3440 printf(" ----------------------------\n"); 3441 */ 3442 } /* for (i=0; i<n; i++) */ 3443 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3444 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3445 3446 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3447 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3448 3449 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3450 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3451 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3452 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3453 3454 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3455 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3456 3457 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3458 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3459 if (row_identity && icol_identity) { 3460 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3461 } else { 3462 B->ops->solve = MatSolve_SeqAIJ; 3463 } 3464 3465 B->ops->solveadd = 0; 3466 B->ops->solvetranspose = 0; 3467 B->ops->solvetransposeadd = 0; 3468 B->ops->matsolve = 0; 3469 B->assembled = PETSC_TRUE; 3470 B->preallocated = PETSC_TRUE; 3471 PetscFunctionReturn(0); 3472 } 3473 3474 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3475 #undef __FUNCT__ 3476 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3477 /* 3478 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3479 */ 3480 3481 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3482 { 3483 PetscErrorCode ierr; 3484 3485 PetscFunctionBegin; 3486 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3487 PetscFunctionReturn(0); 3488 } 3489 3490 /* 3491 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3492 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3493 */ 3494 #undef __FUNCT__ 3495 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3496 /* 3497 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3498 */ 3499 3500 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3501 { 3502 Mat C=fact; 3503 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 3504 IS isrow = b->row,isicol = b->icol; 3505 PetscErrorCode ierr; 3506 const PetscInt *r,*ic,*ics; 3507 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3508 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3509 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3510 PetscReal dt=info->dt,shift=info->shiftamount; 3511 PetscBool row_identity, col_identity; 3512 3513 PetscFunctionBegin; 3514 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3515 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3516 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3517 ics = ic; 3518 3519 for (i=0; i<n; i++){ 3520 /* initialize rtmp array */ 3521 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3522 bjtmp = bj + bi[i]; 3523 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3524 rtmp[i] = 0.0; 3525 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3526 bjtmp = bj + bdiag[i+1] + 1; 3527 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3528 3529 /* load in initial unfactored row of A */ 3530 /* printf("row %d\n",i); */ 3531 nz = ai[r[i]+1] - ai[r[i]]; 3532 ajtmp = aj + ai[r[i]]; 3533 v = aa + ai[r[i]]; 3534 for (j=0; j<nz; j++) { 3535 rtmp[ics[*ajtmp++]] = v[j]; 3536 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3537 } 3538 /* printf("\n"); */ 3539 3540 /* numerical factorization */ 3541 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3542 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3543 k = 0; 3544 while (k < nzl){ 3545 row = *bjtmp++; 3546 /* printf(" prow %d\n",row); */ 3547 pc = rtmp + row; 3548 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3549 multiplier = (*pc) * (*pv); 3550 *pc = multiplier; 3551 if (PetscAbsScalar(multiplier) > dt){ 3552 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3553 pv = b->a + bdiag[row+1] + 1; 3554 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3555 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3556 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3557 } 3558 k++; 3559 } 3560 3561 /* finished row so stick it into b->a */ 3562 /* L-part */ 3563 pv = b->a + bi[i] ; 3564 pj = bj + bi[i] ; 3565 nzl = bi[i+1] - bi[i]; 3566 for (j=0; j<nzl; j++) { 3567 pv[j] = rtmp[pj[j]]; 3568 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3569 } 3570 3571 /* diagonal: invert diagonal entries for simplier triangular solves */ 3572 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3573 b->a[bdiag[i]] = 1.0/rtmp[i]; 3574 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3575 3576 /* U-part */ 3577 pv = b->a + bdiag[i+1] + 1; 3578 pj = bj + bdiag[i+1] + 1; 3579 nzu = bdiag[i] - bdiag[i+1] - 1; 3580 for (j=0; j<nzu; j++) { 3581 pv[j] = rtmp[pj[j]]; 3582 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3583 } 3584 /* printf("\n"); */ 3585 } 3586 3587 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3588 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3589 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3590 3591 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3592 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3593 if (row_identity && col_identity) { 3594 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3595 } else { 3596 C->ops->solve = MatSolve_SeqAIJ; 3597 } 3598 C->ops->solveadd = 0; 3599 C->ops->solvetranspose = 0; 3600 C->ops->solvetransposeadd = 0; 3601 C->ops->matsolve = 0; 3602 C->assembled = PETSC_TRUE; 3603 C->preallocated = PETSC_TRUE; 3604 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3605 PetscFunctionReturn(0); 3606 } 3607