1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 EXTERN_C_BEGIN 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 10 /* 11 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 12 13 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 14 */ 15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 16 { 17 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 20 const PetscInt *ai = a->i, *aj = a->j; 21 const PetscScalar *aa = a->a; 22 PetscBool *done; 23 PetscReal best,past = 0,future; 24 25 PetscFunctionBegin; 26 /* pick initial row */ 27 best = -1; 28 for (i=0; i<n; i++) { 29 future = 0.0; 30 for (j=ai[i]; j<ai[i+1]; j++) { 31 if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 42 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 EXTERN_C_END 94 95 EXTERN_C_BEGIN 96 #undef __FUNCT__ 97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 99 { 100 PetscFunctionBegin; 101 *flg = PETSC_TRUE; 102 PetscFunctionReturn(0); 103 } 104 EXTERN_C_END 105 106 EXTERN_C_BEGIN 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 110 { 111 PetscInt n = A->rmap->n; 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 116 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 117 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 118 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 119 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 120 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 121 ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 122 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 123 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 124 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 125 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 126 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 127 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 128 (*B)->factortype = ftype; 129 PetscFunctionReturn(0); 130 } 131 EXTERN_C_END 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 135 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 136 { 137 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 138 IS isicol; 139 PetscErrorCode ierr; 140 const PetscInt *r,*ic; 141 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 142 PetscInt *bi,*bj,*ajtmp; 143 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 144 PetscReal f; 145 PetscInt nlnk,*lnk,k,**bi_ptr; 146 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 147 PetscBT lnkbt; 148 149 PetscFunctionBegin; 150 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 151 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 152 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 153 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 154 155 /* get new row pointers */ 156 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 157 bi[0] = 0; 158 159 /* bdiag is location of diagonal in factor */ 160 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 161 bdiag[0] = 0; 162 163 /* linked list for storing column indices of the active row */ 164 nlnk = n + 1; 165 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 166 167 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 168 169 /* initial FreeSpace size is f*(ai[n]+1) */ 170 f = info->fill; 171 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 172 current_space = free_space; 173 174 for (i=0; i<n; i++) { 175 /* copy previous fill into linked list */ 176 nzi = 0; 177 nnz = ai[r[i]+1] - ai[r[i]]; 178 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 179 ajtmp = aj + ai[r[i]]; 180 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 181 nzi += nlnk; 182 183 /* add pivot rows into linked list */ 184 row = lnk[n]; 185 while (row < i) { 186 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 187 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 188 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 189 nzi += nlnk; 190 row = lnk[row]; 191 } 192 bi[i+1] = bi[i] + nzi; 193 im[i] = nzi; 194 195 /* mark bdiag */ 196 nzbd = 0; 197 nnz = nzi; 198 k = lnk[n]; 199 while (nnz-- && k < i){ 200 nzbd++; 201 k = lnk[k]; 202 } 203 bdiag[i] = bi[i] + nzbd; 204 205 /* if free space is not available, make more free space */ 206 if (current_space->local_remaining<nzi) { 207 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 208 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 209 reallocs++; 210 } 211 212 /* copy data into free space, then initialize lnk */ 213 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 214 bi_ptr[i] = current_space->array; 215 current_space->array += nzi; 216 current_space->local_used += nzi; 217 current_space->local_remaining -= nzi; 218 } 219 #if defined(PETSC_USE_INFO) 220 if (ai[n] != 0) { 221 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 222 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 223 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 224 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 225 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 226 } else { 227 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 228 } 229 #endif 230 231 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 232 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 233 234 /* destroy list of free space and other temporary array(s) */ 235 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 236 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 237 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 238 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 239 240 /* put together the new matrix */ 241 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 242 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 243 b = (Mat_SeqAIJ*)(B)->data; 244 b->free_a = PETSC_TRUE; 245 b->free_ij = PETSC_TRUE; 246 b->singlemalloc = PETSC_FALSE; 247 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 248 b->j = bj; 249 b->i = bi; 250 b->diag = bdiag; 251 b->ilen = 0; 252 b->imax = 0; 253 b->row = isrow; 254 b->col = iscol; 255 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 256 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 257 b->icol = isicol; 258 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 259 260 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 261 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 262 b->maxnz = b->nz = bi[n] ; 263 264 (B)->factortype = MAT_FACTOR_LU; 265 (B)->info.factor_mallocs = reallocs; 266 (B)->info.fill_ratio_given = f; 267 268 if (ai[n]) { 269 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 270 } else { 271 (B)->info.fill_ratio_needed = 0.0; 272 } 273 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 274 if (a->inode.size) { 275 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 276 } 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 282 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 283 { 284 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 285 IS isicol; 286 PetscErrorCode ierr; 287 const PetscInt *r,*ic; 288 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 289 PetscInt *bi,*bj,*ajtmp; 290 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 291 PetscReal f; 292 PetscInt nlnk,*lnk,k,**bi_ptr; 293 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 294 PetscBT lnkbt; 295 296 PetscFunctionBegin; 297 /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 298 /* 299 PetscBool olddatastruct=PETSC_FALSE; 300 ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 301 if(olddatastruct){ 302 ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 */ 306 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 307 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 308 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 309 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 310 311 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 312 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 313 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 314 bi[0] = bdiag[0] = 0; 315 316 /* linked list for storing column indices of the active row */ 317 nlnk = n + 1; 318 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 319 320 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 321 322 /* initial FreeSpace size is f*(ai[n]+1) */ 323 f = info->fill; 324 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 325 current_space = free_space; 326 327 for (i=0; i<n; i++) { 328 /* copy previous fill into linked list */ 329 nzi = 0; 330 nnz = ai[r[i]+1] - ai[r[i]]; 331 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 332 ajtmp = aj + ai[r[i]]; 333 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 334 nzi += nlnk; 335 336 /* add pivot rows into linked list */ 337 row = lnk[n]; 338 while (row < i){ 339 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 340 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 341 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 342 nzi += nlnk; 343 row = lnk[row]; 344 } 345 bi[i+1] = bi[i] + nzi; 346 im[i] = nzi; 347 348 /* mark bdiag */ 349 nzbd = 0; 350 nnz = nzi; 351 k = lnk[n]; 352 while (nnz-- && k < i){ 353 nzbd++; 354 k = lnk[k]; 355 } 356 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 357 358 /* if free space is not available, make more free space */ 359 if (current_space->local_remaining<nzi) { 360 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 361 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 362 reallocs++; 363 } 364 365 /* copy data into free space, then initialize lnk */ 366 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 367 bi_ptr[i] = current_space->array; 368 current_space->array += nzi; 369 current_space->local_used += nzi; 370 current_space->local_remaining -= nzi; 371 } 372 373 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 374 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 375 376 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 377 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 378 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 379 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 380 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 381 382 /* put together the new matrix */ 383 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 384 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 385 b = (Mat_SeqAIJ*)(B)->data; 386 b->free_a = PETSC_TRUE; 387 b->free_ij = PETSC_TRUE; 388 b->singlemalloc = PETSC_FALSE; 389 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 390 b->j = bj; 391 b->i = bi; 392 b->diag = bdiag; 393 b->ilen = 0; 394 b->imax = 0; 395 b->row = isrow; 396 b->col = iscol; 397 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 398 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 399 b->icol = isicol; 400 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 401 402 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 403 ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 404 b->maxnz = b->nz = bdiag[0]+1; 405 B->factortype = MAT_FACTOR_LU; 406 B->info.factor_mallocs = reallocs; 407 B->info.fill_ratio_given = f; 408 409 if (ai[n]) { 410 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 411 } else { 412 B->info.fill_ratio_needed = 0.0; 413 } 414 #if defined(PETSC_USE_INFO) 415 if (ai[n] != 0) { 416 PetscReal af = B->info.fill_ratio_needed; 417 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 418 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 419 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 420 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 421 } else { 422 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 423 } 424 #endif 425 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 426 if (a->inode.size) { 427 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 428 } 429 PetscFunctionReturn(0); 430 } 431 432 /* 433 Trouble in factorization, should we dump the original matrix? 434 */ 435 #undef __FUNCT__ 436 #define __FUNCT__ "MatFactorDumpMatrix" 437 PetscErrorCode MatFactorDumpMatrix(Mat A) 438 { 439 PetscErrorCode ierr; 440 PetscBool flg = PETSC_FALSE; 441 442 PetscFunctionBegin; 443 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 444 if (flg) { 445 PetscViewer viewer; 446 char filename[PETSC_MAX_PATH_LEN]; 447 448 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 449 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 450 ierr = MatView(A,viewer);CHKERRQ(ierr); 451 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 452 } 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 458 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 459 { 460 Mat C=B; 461 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 462 IS isrow = b->row,isicol = b->icol; 463 PetscErrorCode ierr; 464 const PetscInt *r,*ic,*ics; 465 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 466 PetscInt i,j,k,nz,nzL,row,*pj; 467 const PetscInt *ajtmp,*bjtmp; 468 MatScalar *rtmp,*pc,multiplier,*pv; 469 const MatScalar *aa=a->a,*v; 470 PetscBool row_identity,col_identity; 471 FactorShiftCtx sctx; 472 const PetscInt *ddiag; 473 PetscReal rs; 474 MatScalar d; 475 476 PetscFunctionBegin; 477 /* MatPivotSetUp(): initialize shift context sctx */ 478 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 479 480 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 481 ddiag = a->diag; 482 sctx.shift_top = info->zeropivot; 483 for (i=0; i<n; i++) { 484 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 485 d = (aa)[ddiag[i]]; 486 rs = -PetscAbsScalar(d) - PetscRealPart(d); 487 v = aa+ai[i]; 488 nz = ai[i+1] - ai[i]; 489 for (j=0; j<nz; j++) 490 rs += PetscAbsScalar(v[j]); 491 if (rs>sctx.shift_top) sctx.shift_top = rs; 492 } 493 sctx.shift_top *= 1.1; 494 sctx.nshift_max = 5; 495 sctx.shift_lo = 0.; 496 sctx.shift_hi = 1.; 497 } 498 499 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 500 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 501 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 502 ics = ic; 503 504 do { 505 sctx.newshift = PETSC_FALSE; 506 for (i=0; i<n; i++){ 507 /* zero rtmp */ 508 /* L part */ 509 nz = bi[i+1] - bi[i]; 510 bjtmp = bj + bi[i]; 511 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 512 513 /* U part */ 514 nz = bdiag[i]-bdiag[i+1]; 515 bjtmp = bj + bdiag[i+1]+1; 516 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 517 518 /* load in initial (unfactored row) */ 519 nz = ai[r[i]+1] - ai[r[i]]; 520 ajtmp = aj + ai[r[i]]; 521 v = aa + ai[r[i]]; 522 for (j=0; j<nz; j++) { 523 rtmp[ics[ajtmp[j]]] = v[j]; 524 } 525 /* ZeropivotApply() */ 526 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 527 528 /* elimination */ 529 bjtmp = bj + bi[i]; 530 row = *bjtmp++; 531 nzL = bi[i+1] - bi[i]; 532 for(k=0; k < nzL;k++) { 533 pc = rtmp + row; 534 if (*pc != 0.0) { 535 pv = b->a + bdiag[row]; 536 multiplier = *pc * (*pv); 537 *pc = multiplier; 538 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 539 pv = b->a + bdiag[row+1]+1; 540 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 541 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 542 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 543 } 544 row = *bjtmp++; 545 } 546 547 /* finished row so stick it into b->a */ 548 rs = 0.0; 549 /* L part */ 550 pv = b->a + bi[i] ; 551 pj = b->j + bi[i] ; 552 nz = bi[i+1] - bi[i]; 553 for (j=0; j<nz; j++) { 554 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 555 } 556 557 /* U part */ 558 pv = b->a + bdiag[i+1]+1; 559 pj = b->j + bdiag[i+1]+1; 560 nz = bdiag[i] - bdiag[i+1]-1; 561 for (j=0; j<nz; j++) { 562 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 563 } 564 565 sctx.rs = rs; 566 sctx.pv = rtmp[i]; 567 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 568 if(sctx.newshift) break; /* break for-loop */ 569 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 570 571 /* Mark diagonal and invert diagonal for simplier triangular solves */ 572 pv = b->a + bdiag[i]; 573 *pv = 1.0/rtmp[i]; 574 575 } /* endof for (i=0; i<n; i++){ */ 576 577 /* MatPivotRefine() */ 578 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){ 579 /* 580 * if no shift in this attempt & shifting & started shifting & can refine, 581 * then try lower shift 582 */ 583 sctx.shift_hi = sctx.shift_fraction; 584 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 585 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 586 sctx.newshift = PETSC_TRUE; 587 sctx.nshift++; 588 } 589 } while (sctx.newshift); 590 591 ierr = PetscFree(rtmp);CHKERRQ(ierr); 592 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 593 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 594 595 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 596 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 597 if (row_identity && col_identity) { 598 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 599 } else { 600 C->ops->solve = MatSolve_SeqAIJ; 601 } 602 C->ops->solveadd = MatSolveAdd_SeqAIJ; 603 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 604 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 605 C->ops->matsolve = MatMatSolve_SeqAIJ; 606 C->assembled = PETSC_TRUE; 607 C->preallocated = PETSC_TRUE; 608 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 609 610 /* MatShiftView(A,info,&sctx) */ 611 if (sctx.nshift){ 612 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 613 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 614 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 615 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 616 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 617 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 618 } 619 } 620 ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 626 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 627 { 628 Mat C=B; 629 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 630 IS isrow = b->row,isicol = b->icol; 631 PetscErrorCode ierr; 632 const PetscInt *r,*ic,*ics; 633 PetscInt nz,row,i,j,n=A->rmap->n,diag; 634 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 635 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 636 MatScalar *pv,*rtmp,*pc,multiplier,d; 637 const MatScalar *v,*aa=a->a; 638 PetscReal rs=0.0; 639 FactorShiftCtx sctx; 640 const PetscInt *ddiag; 641 PetscBool row_identity, col_identity; 642 643 PetscFunctionBegin; 644 /* MatPivotSetUp(): initialize shift context sctx */ 645 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 646 647 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 648 ddiag = a->diag; 649 sctx.shift_top = info->zeropivot; 650 for (i=0; i<n; i++) { 651 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 652 d = (aa)[ddiag[i]]; 653 rs = -PetscAbsScalar(d) - PetscRealPart(d); 654 v = aa+ai[i]; 655 nz = ai[i+1] - ai[i]; 656 for (j=0; j<nz; j++) 657 rs += PetscAbsScalar(v[j]); 658 if (rs>sctx.shift_top) sctx.shift_top = rs; 659 } 660 sctx.shift_top *= 1.1; 661 sctx.nshift_max = 5; 662 sctx.shift_lo = 0.; 663 sctx.shift_hi = 1.; 664 } 665 666 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 667 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 668 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 669 ics = ic; 670 671 do { 672 sctx.newshift = PETSC_FALSE; 673 for (i=0; i<n; i++){ 674 nz = bi[i+1] - bi[i]; 675 bjtmp = bj + bi[i]; 676 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 677 678 /* load in initial (unfactored row) */ 679 nz = ai[r[i]+1] - ai[r[i]]; 680 ajtmp = aj + ai[r[i]]; 681 v = aa + ai[r[i]]; 682 for (j=0; j<nz; j++) { 683 rtmp[ics[ajtmp[j]]] = v[j]; 684 } 685 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 686 687 row = *bjtmp++; 688 while (row < i) { 689 pc = rtmp + row; 690 if (*pc != 0.0) { 691 pv = b->a + diag_offset[row]; 692 pj = b->j + diag_offset[row] + 1; 693 multiplier = *pc / *pv++; 694 *pc = multiplier; 695 nz = bi[row+1] - diag_offset[row] - 1; 696 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 697 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 698 } 699 row = *bjtmp++; 700 } 701 /* finished row so stick it into b->a */ 702 pv = b->a + bi[i] ; 703 pj = b->j + bi[i] ; 704 nz = bi[i+1] - bi[i]; 705 diag = diag_offset[i] - bi[i]; 706 rs = 0.0; 707 for (j=0; j<nz; j++) { 708 pv[j] = rtmp[pj[j]]; 709 rs += PetscAbsScalar(pv[j]); 710 } 711 rs -= PetscAbsScalar(pv[diag]); 712 713 sctx.rs = rs; 714 sctx.pv = pv[diag]; 715 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 716 if (sctx.newshift) break; 717 pv[diag] = sctx.pv; 718 } 719 720 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 721 /* 722 * if no shift in this attempt & shifting & started shifting & can refine, 723 * then try lower shift 724 */ 725 sctx.shift_hi = sctx.shift_fraction; 726 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 727 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 728 sctx.newshift = PETSC_TRUE; 729 sctx.nshift++; 730 } 731 } while (sctx.newshift); 732 733 /* invert diagonal entries for simplier triangular solves */ 734 for (i=0; i<n; i++) { 735 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 736 } 737 ierr = PetscFree(rtmp);CHKERRQ(ierr); 738 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 739 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 740 741 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 742 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 743 if (row_identity && col_identity) { 744 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 745 } else { 746 C->ops->solve = MatSolve_SeqAIJ_inplace; 747 } 748 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 749 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 750 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 751 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 752 C->assembled = PETSC_TRUE; 753 C->preallocated = PETSC_TRUE; 754 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 755 if (sctx.nshift){ 756 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 757 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 758 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 759 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 760 } 761 } 762 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 763 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 764 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 /* 769 This routine implements inplace ILU(0) with row or/and column permutations. 770 Input: 771 A - original matrix 772 Output; 773 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 774 a->j (col index) is permuted by the inverse of colperm, then sorted 775 a->a reordered accordingly with a->j 776 a->diag (ptr to diagonal elements) is updated. 777 */ 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 780 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 781 { 782 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 783 IS isrow = a->row,isicol = a->icol; 784 PetscErrorCode ierr; 785 const PetscInt *r,*ic,*ics; 786 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 787 PetscInt *ajtmp,nz,row; 788 PetscInt *diag = a->diag,nbdiag,*pj; 789 PetscScalar *rtmp,*pc,multiplier,d; 790 MatScalar *pv,*v; 791 PetscReal rs; 792 FactorShiftCtx sctx; 793 const MatScalar *aa=a->a,*vtmp; 794 795 PetscFunctionBegin; 796 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 797 798 /* MatPivotSetUp(): initialize shift context sctx */ 799 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 800 801 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 802 const PetscInt *ddiag = a->diag; 803 sctx.shift_top = info->zeropivot; 804 for (i=0; i<n; i++) { 805 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 806 d = (aa)[ddiag[i]]; 807 rs = -PetscAbsScalar(d) - PetscRealPart(d); 808 vtmp = aa+ai[i]; 809 nz = ai[i+1] - ai[i]; 810 for (j=0; j<nz; j++) 811 rs += PetscAbsScalar(vtmp[j]); 812 if (rs>sctx.shift_top) sctx.shift_top = rs; 813 } 814 sctx.shift_top *= 1.1; 815 sctx.nshift_max = 5; 816 sctx.shift_lo = 0.; 817 sctx.shift_hi = 1.; 818 } 819 820 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 821 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 822 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 823 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 824 ics = ic; 825 826 #if defined(MV) 827 sctx.shift_top = 0.; 828 sctx.nshift_max = 0; 829 sctx.shift_lo = 0.; 830 sctx.shift_hi = 0.; 831 sctx.shift_fraction = 0.; 832 833 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 834 sctx.shift_top = 0.; 835 for (i=0; i<n; i++) { 836 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 837 d = (a->a)[diag[i]]; 838 rs = -PetscAbsScalar(d) - PetscRealPart(d); 839 v = a->a+ai[i]; 840 nz = ai[i+1] - ai[i]; 841 for (j=0; j<nz; j++) 842 rs += PetscAbsScalar(v[j]); 843 if (rs>sctx.shift_top) sctx.shift_top = rs; 844 } 845 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 846 sctx.shift_top *= 1.1; 847 sctx.nshift_max = 5; 848 sctx.shift_lo = 0.; 849 sctx.shift_hi = 1.; 850 } 851 852 sctx.shift_amount = 0.; 853 sctx.nshift = 0; 854 #endif 855 856 do { 857 sctx.newshift = PETSC_FALSE; 858 for (i=0; i<n; i++){ 859 /* load in initial unfactored row */ 860 nz = ai[r[i]+1] - ai[r[i]]; 861 ajtmp = aj + ai[r[i]]; 862 v = a->a + ai[r[i]]; 863 /* sort permuted ajtmp and values v accordingly */ 864 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 865 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 866 867 diag[r[i]] = ai[r[i]]; 868 for (j=0; j<nz; j++) { 869 rtmp[ajtmp[j]] = v[j]; 870 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 871 } 872 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 873 874 row = *ajtmp++; 875 while (row < i) { 876 pc = rtmp + row; 877 if (*pc != 0.0) { 878 pv = a->a + diag[r[row]]; 879 pj = aj + diag[r[row]] + 1; 880 881 multiplier = *pc / *pv++; 882 *pc = multiplier; 883 nz = ai[r[row]+1] - diag[r[row]] - 1; 884 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 885 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 886 } 887 row = *ajtmp++; 888 } 889 /* finished row so overwrite it onto a->a */ 890 pv = a->a + ai[r[i]] ; 891 pj = aj + ai[r[i]] ; 892 nz = ai[r[i]+1] - ai[r[i]]; 893 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 894 895 rs = 0.0; 896 for (j=0; j<nz; j++) { 897 pv[j] = rtmp[pj[j]]; 898 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 899 } 900 901 sctx.rs = rs; 902 sctx.pv = pv[nbdiag]; 903 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 904 if (sctx.newshift) break; 905 pv[nbdiag] = sctx.pv; 906 } 907 908 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 909 /* 910 * if no shift in this attempt & shifting & started shifting & can refine, 911 * then try lower shift 912 */ 913 sctx.shift_hi = sctx.shift_fraction; 914 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 915 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 916 sctx.newshift = PETSC_TRUE; 917 sctx.nshift++; 918 } 919 } while (sctx.newshift); 920 921 /* invert diagonal entries for simplier triangular solves */ 922 for (i=0; i<n; i++) { 923 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 924 } 925 926 ierr = PetscFree(rtmp);CHKERRQ(ierr); 927 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 928 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 929 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 930 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 931 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 932 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 933 A->assembled = PETSC_TRUE; 934 A->preallocated = PETSC_TRUE; 935 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 936 if (sctx.nshift){ 937 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 938 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 939 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 940 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 941 } 942 } 943 PetscFunctionReturn(0); 944 } 945 946 /* ----------------------------------------------------------- */ 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatLUFactor_SeqAIJ" 949 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 950 { 951 PetscErrorCode ierr; 952 Mat C; 953 954 PetscFunctionBegin; 955 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 956 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 957 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 958 A->ops->solve = C->ops->solve; 959 A->ops->solvetranspose = C->ops->solvetranspose; 960 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 961 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 /* ----------------------------------------------------------- */ 965 966 967 #undef __FUNCT__ 968 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 969 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 970 { 971 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 972 IS iscol = a->col,isrow = a->row; 973 PetscErrorCode ierr; 974 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 975 PetscInt nz; 976 const PetscInt *rout,*cout,*r,*c; 977 PetscScalar *x,*tmp,*tmps,sum; 978 const PetscScalar *b; 979 const MatScalar *aa = a->a,*v; 980 981 PetscFunctionBegin; 982 if (!n) PetscFunctionReturn(0); 983 984 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 985 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 986 tmp = a->solve_work; 987 988 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 989 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 990 991 /* forward solve the lower triangular */ 992 tmp[0] = b[*r++]; 993 tmps = tmp; 994 for (i=1; i<n; i++) { 995 v = aa + ai[i] ; 996 vi = aj + ai[i] ; 997 nz = a->diag[i] - ai[i]; 998 sum = b[*r++]; 999 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1000 tmp[i] = sum; 1001 } 1002 1003 /* backward solve the upper triangular */ 1004 for (i=n-1; i>=0; i--){ 1005 v = aa + a->diag[i] + 1; 1006 vi = aj + a->diag[i] + 1; 1007 nz = ai[i+1] - a->diag[i] - 1; 1008 sum = tmp[i]; 1009 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1010 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1011 } 1012 1013 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1014 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1015 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1016 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1017 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1023 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1024 { 1025 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1026 IS iscol = a->col,isrow = a->row; 1027 PetscErrorCode ierr; 1028 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1029 PetscInt nz,neq; 1030 const PetscInt *rout,*cout,*r,*c; 1031 PetscScalar *x,*b,*tmp,*tmps,sum; 1032 const MatScalar *aa = a->a,*v; 1033 PetscBool bisdense,xisdense; 1034 1035 PetscFunctionBegin; 1036 if (!n) PetscFunctionReturn(0); 1037 1038 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1039 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1040 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1041 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1042 1043 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1044 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1045 1046 tmp = a->solve_work; 1047 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1048 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1049 1050 for (neq=0; neq<B->cmap->n; neq++){ 1051 /* forward solve the lower triangular */ 1052 tmp[0] = b[r[0]]; 1053 tmps = tmp; 1054 for (i=1; i<n; i++) { 1055 v = aa + ai[i] ; 1056 vi = aj + ai[i] ; 1057 nz = a->diag[i] - ai[i]; 1058 sum = b[r[i]]; 1059 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1060 tmp[i] = sum; 1061 } 1062 /* backward solve the upper triangular */ 1063 for (i=n-1; i>=0; i--){ 1064 v = aa + a->diag[i] + 1; 1065 vi = aj + a->diag[i] + 1; 1066 nz = ai[i+1] - a->diag[i] - 1; 1067 sum = tmp[i]; 1068 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1069 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1070 } 1071 1072 b += n; 1073 x += n; 1074 } 1075 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1076 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1077 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1078 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1079 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1085 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1086 { 1087 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1088 IS iscol = a->col,isrow = a->row; 1089 PetscErrorCode ierr; 1090 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1091 PetscInt nz,neq; 1092 const PetscInt *rout,*cout,*r,*c; 1093 PetscScalar *x,*b,*tmp,sum; 1094 const MatScalar *aa = a->a,*v; 1095 PetscBool bisdense,xisdense; 1096 1097 PetscFunctionBegin; 1098 if (!n) PetscFunctionReturn(0); 1099 1100 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1101 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1102 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1103 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1104 1105 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1106 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1107 1108 tmp = a->solve_work; 1109 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1110 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1111 1112 for (neq=0; neq<B->cmap->n; neq++){ 1113 /* forward solve the lower triangular */ 1114 tmp[0] = b[r[0]]; 1115 v = aa; 1116 vi = aj; 1117 for (i=1; i<n; i++) { 1118 nz = ai[i+1] - ai[i]; 1119 sum = b[r[i]]; 1120 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1121 tmp[i] = sum; 1122 v += nz; vi += nz; 1123 } 1124 1125 /* backward solve the upper triangular */ 1126 for (i=n-1; i>=0; i--){ 1127 v = aa + adiag[i+1]+1; 1128 vi = aj + adiag[i+1]+1; 1129 nz = adiag[i]-adiag[i+1]-1; 1130 sum = tmp[i]; 1131 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1132 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1133 } 1134 1135 b += n; 1136 x += n; 1137 } 1138 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1139 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1140 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1141 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1142 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1148 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1149 { 1150 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1151 IS iscol = a->col,isrow = a->row; 1152 PetscErrorCode ierr; 1153 const PetscInt *r,*c,*rout,*cout; 1154 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1155 PetscInt nz,row; 1156 PetscScalar *x,*b,*tmp,*tmps,sum; 1157 const MatScalar *aa = a->a,*v; 1158 1159 PetscFunctionBegin; 1160 if (!n) PetscFunctionReturn(0); 1161 1162 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1163 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1164 tmp = a->solve_work; 1165 1166 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1167 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1168 1169 /* forward solve the lower triangular */ 1170 tmp[0] = b[*r++]; 1171 tmps = tmp; 1172 for (row=1; row<n; row++) { 1173 i = rout[row]; /* permuted row */ 1174 v = aa + ai[i] ; 1175 vi = aj + ai[i] ; 1176 nz = a->diag[i] - ai[i]; 1177 sum = b[*r++]; 1178 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1179 tmp[row] = sum; 1180 } 1181 1182 /* backward solve the upper triangular */ 1183 for (row=n-1; row>=0; row--){ 1184 i = rout[row]; /* permuted row */ 1185 v = aa + a->diag[i] + 1; 1186 vi = aj + a->diag[i] + 1; 1187 nz = ai[i+1] - a->diag[i] - 1; 1188 sum = tmp[row]; 1189 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1190 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1191 } 1192 1193 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1194 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1195 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1196 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1197 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 /* ----------------------------------------------------------- */ 1202 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1203 #undef __FUNCT__ 1204 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1205 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1206 { 1207 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1208 PetscErrorCode ierr; 1209 PetscInt n = A->rmap->n; 1210 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1211 PetscScalar *x; 1212 const PetscScalar *b; 1213 const MatScalar *aa = a->a; 1214 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1215 PetscInt adiag_i,i,nz,ai_i; 1216 const PetscInt *vi; 1217 const MatScalar *v; 1218 PetscScalar sum; 1219 #endif 1220 1221 PetscFunctionBegin; 1222 if (!n) PetscFunctionReturn(0); 1223 1224 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1225 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1226 1227 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1228 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1229 #else 1230 /* forward solve the lower triangular */ 1231 x[0] = b[0]; 1232 for (i=1; i<n; i++) { 1233 ai_i = ai[i]; 1234 v = aa + ai_i; 1235 vi = aj + ai_i; 1236 nz = adiag[i] - ai_i; 1237 sum = b[i]; 1238 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1239 x[i] = sum; 1240 } 1241 1242 /* backward solve the upper triangular */ 1243 for (i=n-1; i>=0; i--){ 1244 adiag_i = adiag[i]; 1245 v = aa + adiag_i + 1; 1246 vi = aj + adiag_i + 1; 1247 nz = ai[i+1] - adiag_i - 1; 1248 sum = x[i]; 1249 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1250 x[i] = sum*aa[adiag_i]; 1251 } 1252 #endif 1253 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1254 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1255 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1261 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1262 { 1263 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1264 IS iscol = a->col,isrow = a->row; 1265 PetscErrorCode ierr; 1266 PetscInt i, n = A->rmap->n,j; 1267 PetscInt nz; 1268 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1269 PetscScalar *x,*tmp,sum; 1270 const PetscScalar *b; 1271 const MatScalar *aa = a->a,*v; 1272 1273 PetscFunctionBegin; 1274 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1275 1276 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1277 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1278 tmp = a->solve_work; 1279 1280 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1281 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1282 1283 /* forward solve the lower triangular */ 1284 tmp[0] = b[*r++]; 1285 for (i=1; i<n; i++) { 1286 v = aa + ai[i] ; 1287 vi = aj + ai[i] ; 1288 nz = a->diag[i] - ai[i]; 1289 sum = b[*r++]; 1290 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1291 tmp[i] = sum; 1292 } 1293 1294 /* backward solve the upper triangular */ 1295 for (i=n-1; i>=0; i--){ 1296 v = aa + a->diag[i] + 1; 1297 vi = aj + a->diag[i] + 1; 1298 nz = ai[i+1] - a->diag[i] - 1; 1299 sum = tmp[i]; 1300 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1301 tmp[i] = sum*aa[a->diag[i]]; 1302 x[*c--] += tmp[i]; 1303 } 1304 1305 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1306 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1307 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1308 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1309 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1310 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1316 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1317 { 1318 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1319 IS iscol = a->col,isrow = a->row; 1320 PetscErrorCode ierr; 1321 PetscInt i, n = A->rmap->n,j; 1322 PetscInt nz; 1323 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1324 PetscScalar *x,*tmp,sum; 1325 const PetscScalar *b; 1326 const MatScalar *aa = a->a,*v; 1327 1328 PetscFunctionBegin; 1329 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1330 1331 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1332 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1333 tmp = a->solve_work; 1334 1335 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1336 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1337 1338 /* forward solve the lower triangular */ 1339 tmp[0] = b[r[0]]; 1340 v = aa; 1341 vi = aj; 1342 for (i=1; i<n; i++) { 1343 nz = ai[i+1] - ai[i]; 1344 sum = b[r[i]]; 1345 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1346 tmp[i] = sum; 1347 v += nz; vi += nz; 1348 } 1349 1350 /* backward solve the upper triangular */ 1351 v = aa + adiag[n-1]; 1352 vi = aj + adiag[n-1]; 1353 for (i=n-1; i>=0; i--){ 1354 nz = adiag[i] - adiag[i+1] - 1; 1355 sum = tmp[i]; 1356 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1357 tmp[i] = sum*v[nz]; 1358 x[c[i]] += tmp[i]; 1359 v += nz+1; vi += nz+1; 1360 } 1361 1362 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1363 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1364 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1365 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1366 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1367 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1373 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1374 { 1375 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1376 IS iscol = a->col,isrow = a->row; 1377 PetscErrorCode ierr; 1378 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1379 PetscInt i,n = A->rmap->n,j; 1380 PetscInt nz; 1381 PetscScalar *x,*tmp,s1; 1382 const MatScalar *aa = a->a,*v; 1383 const PetscScalar *b; 1384 1385 PetscFunctionBegin; 1386 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1387 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1388 tmp = a->solve_work; 1389 1390 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1391 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1392 1393 /* copy the b into temp work space according to permutation */ 1394 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1395 1396 /* forward solve the U^T */ 1397 for (i=0; i<n; i++) { 1398 v = aa + diag[i] ; 1399 vi = aj + diag[i] + 1; 1400 nz = ai[i+1] - diag[i] - 1; 1401 s1 = tmp[i]; 1402 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1403 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1404 tmp[i] = s1; 1405 } 1406 1407 /* backward solve the L^T */ 1408 for (i=n-1; i>=0; i--){ 1409 v = aa + diag[i] - 1 ; 1410 vi = aj + diag[i] - 1 ; 1411 nz = diag[i] - ai[i]; 1412 s1 = tmp[i]; 1413 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1414 } 1415 1416 /* copy tmp into x according to permutation */ 1417 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1418 1419 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1420 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1421 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1422 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1423 1424 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1430 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1431 { 1432 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1433 IS iscol = a->col,isrow = a->row; 1434 PetscErrorCode ierr; 1435 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1436 PetscInt i,n = A->rmap->n,j; 1437 PetscInt nz; 1438 PetscScalar *x,*tmp,s1; 1439 const MatScalar *aa = a->a,*v; 1440 const PetscScalar *b; 1441 1442 PetscFunctionBegin; 1443 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1444 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1445 tmp = a->solve_work; 1446 1447 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1448 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1449 1450 /* copy the b into temp work space according to permutation */ 1451 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1452 1453 /* forward solve the U^T */ 1454 for (i=0; i<n; i++) { 1455 v = aa + adiag[i+1] + 1; 1456 vi = aj + adiag[i+1] + 1; 1457 nz = adiag[i] - adiag[i+1] - 1; 1458 s1 = tmp[i]; 1459 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1460 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1461 tmp[i] = s1; 1462 } 1463 1464 /* backward solve the L^T */ 1465 for (i=n-1; i>=0; i--){ 1466 v = aa + ai[i]; 1467 vi = aj + ai[i]; 1468 nz = ai[i+1] - ai[i]; 1469 s1 = tmp[i]; 1470 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1471 } 1472 1473 /* copy tmp into x according to permutation */ 1474 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1475 1476 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1477 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1478 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1479 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1480 1481 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1487 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1488 { 1489 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1490 IS iscol = a->col,isrow = a->row; 1491 PetscErrorCode ierr; 1492 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1493 PetscInt i,n = A->rmap->n,j; 1494 PetscInt nz; 1495 PetscScalar *x,*tmp,s1; 1496 const MatScalar *aa = a->a,*v; 1497 const PetscScalar *b; 1498 1499 PetscFunctionBegin; 1500 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1501 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1502 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1503 tmp = a->solve_work; 1504 1505 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1506 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1507 1508 /* copy the b into temp work space according to permutation */ 1509 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1510 1511 /* forward solve the U^T */ 1512 for (i=0; i<n; i++) { 1513 v = aa + diag[i] ; 1514 vi = aj + diag[i] + 1; 1515 nz = ai[i+1] - diag[i] - 1; 1516 s1 = tmp[i]; 1517 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1518 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1519 tmp[i] = s1; 1520 } 1521 1522 /* backward solve the L^T */ 1523 for (i=n-1; i>=0; i--){ 1524 v = aa + diag[i] - 1 ; 1525 vi = aj + diag[i] - 1 ; 1526 nz = diag[i] - ai[i]; 1527 s1 = tmp[i]; 1528 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1529 } 1530 1531 /* copy tmp into x according to permutation */ 1532 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1533 1534 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1535 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1536 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1537 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1538 1539 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1545 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1546 { 1547 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1548 IS iscol = a->col,isrow = a->row; 1549 PetscErrorCode ierr; 1550 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1551 PetscInt i,n = A->rmap->n,j; 1552 PetscInt nz; 1553 PetscScalar *x,*tmp,s1; 1554 const MatScalar *aa = a->a,*v; 1555 const PetscScalar *b; 1556 1557 PetscFunctionBegin; 1558 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1559 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1560 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1561 tmp = a->solve_work; 1562 1563 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1564 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1565 1566 /* copy the b into temp work space according to permutation */ 1567 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1568 1569 /* forward solve the U^T */ 1570 for (i=0; i<n; i++) { 1571 v = aa + adiag[i+1] + 1; 1572 vi = aj + adiag[i+1] + 1; 1573 nz = adiag[i] - adiag[i+1] - 1; 1574 s1 = tmp[i]; 1575 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1576 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1577 tmp[i] = s1; 1578 } 1579 1580 1581 /* backward solve the L^T */ 1582 for (i=n-1; i>=0; i--){ 1583 v = aa + ai[i] ; 1584 vi = aj + ai[i]; 1585 nz = ai[i+1] - ai[i]; 1586 s1 = tmp[i]; 1587 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1588 } 1589 1590 /* copy tmp into x according to permutation */ 1591 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1592 1593 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1594 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1595 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1596 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1597 1598 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 /* ----------------------------------------------------------------*/ 1603 1604 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool ); 1605 1606 /* 1607 ilu() under revised new data structure. 1608 Factored arrays bj and ba are stored as 1609 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1610 1611 bi=fact->i is an array of size n+1, in which 1612 bi+ 1613 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1614 bi[n]: points to L(n-1,n-1)+1 1615 1616 bdiag=fact->diag is an array of size n+1,in which 1617 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1618 bdiag[n]: points to entry of U(n-1,0)-1 1619 1620 U(i,:) contains bdiag[i] as its last entry, i.e., 1621 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1622 */ 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1625 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1626 { 1627 1628 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1629 PetscErrorCode ierr; 1630 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1631 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1632 PetscBool missing; 1633 IS isicol; 1634 1635 PetscFunctionBegin; 1636 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1637 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1638 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1639 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1640 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1641 b = (Mat_SeqAIJ*)(fact)->data; 1642 1643 /* allocate matrix arrays for new data structure */ 1644 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1645 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1646 b->singlemalloc = PETSC_TRUE; 1647 if (!b->diag){ 1648 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1649 ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1650 } 1651 bdiag = b->diag; 1652 1653 if (n > 0) { 1654 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1655 } 1656 1657 /* set bi and bj with new data structure */ 1658 bi = b->i; 1659 bj = b->j; 1660 1661 /* L part */ 1662 bi[0] = 0; 1663 for (i=0; i<n; i++){ 1664 nz = adiag[i] - ai[i]; 1665 bi[i+1] = bi[i] + nz; 1666 aj = a->j + ai[i]; 1667 for (j=0; j<nz; j++){ 1668 /* *bj = aj[j]; bj++; */ 1669 bj[k++] = aj[j]; 1670 } 1671 } 1672 1673 /* U part */ 1674 bdiag[n] = bi[n]-1; 1675 for (i=n-1; i>=0; i--){ 1676 nz = ai[i+1] - adiag[i] - 1; 1677 aj = a->j + adiag[i] + 1; 1678 for (j=0; j<nz; j++){ 1679 /* *bj = aj[j]; bj++; */ 1680 bj[k++] = aj[j]; 1681 } 1682 /* diag[i] */ 1683 /* *bj = i; bj++; */ 1684 bj[k++] = i; 1685 bdiag[i] = bdiag[i+1] + nz + 1; 1686 } 1687 1688 fact->factortype = MAT_FACTOR_ILU; 1689 fact->info.factor_mallocs = 0; 1690 fact->info.fill_ratio_given = info->fill; 1691 fact->info.fill_ratio_needed = 1.0; 1692 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1693 1694 b = (Mat_SeqAIJ*)(fact)->data; 1695 b->row = isrow; 1696 b->col = iscol; 1697 b->icol = isicol; 1698 ierr = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1699 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1700 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1706 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1707 { 1708 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1709 IS isicol; 1710 PetscErrorCode ierr; 1711 const PetscInt *r,*ic; 1712 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1713 PetscInt *bi,*cols,nnz,*cols_lvl; 1714 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1715 PetscInt i,levels,diagonal_fill; 1716 PetscBool col_identity,row_identity; 1717 PetscReal f; 1718 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1719 PetscBT lnkbt; 1720 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1721 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1722 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1723 1724 PetscFunctionBegin; 1725 /* Uncomment the old data struct part only while testing new data structure for MatSolve() */ 1726 /* 1727 PetscBool olddatastruct=PETSC_FALSE; 1728 ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 1729 if(olddatastruct){ 1730 ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1731 PetscFunctionReturn(0); 1732 } 1733 */ 1734 1735 levels = (PetscInt)info->levels; 1736 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1737 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1738 if (!levels && row_identity && col_identity) { 1739 /* special case: ilu(0) with natural ordering */ 1740 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1741 if (a->inode.size) { 1742 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1743 } 1744 PetscFunctionReturn(0); 1745 } 1746 1747 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1748 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1749 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1750 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1751 1752 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1753 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1754 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1755 bi[0] = bdiag[0] = 0; 1756 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1757 1758 /* create a linked list for storing column indices of the active row */ 1759 nlnk = n + 1; 1760 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1761 1762 /* initial FreeSpace size is f*(ai[n]+1) */ 1763 f = info->fill; 1764 diagonal_fill = (PetscInt)info->diagonal_fill; 1765 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1766 current_space = free_space; 1767 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1768 current_space_lvl = free_space_lvl; 1769 for (i=0; i<n; i++) { 1770 nzi = 0; 1771 /* copy current row into linked list */ 1772 nnz = ai[r[i]+1] - ai[r[i]]; 1773 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1774 cols = aj + ai[r[i]]; 1775 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1776 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1777 nzi += nlnk; 1778 1779 /* make sure diagonal entry is included */ 1780 if (diagonal_fill && lnk[i] == -1) { 1781 fm = n; 1782 while (lnk[fm] < i) fm = lnk[fm]; 1783 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1784 lnk[fm] = i; 1785 lnk_lvl[i] = 0; 1786 nzi++; dcount++; 1787 } 1788 1789 /* add pivot rows into the active row */ 1790 nzbd = 0; 1791 prow = lnk[n]; 1792 while (prow < i) { 1793 nnz = bdiag[prow]; 1794 cols = bj_ptr[prow] + nnz + 1; 1795 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1796 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1797 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1798 nzi += nlnk; 1799 prow = lnk[prow]; 1800 nzbd++; 1801 } 1802 bdiag[i] = nzbd; 1803 bi[i+1] = bi[i] + nzi; 1804 /* if free space is not available, make more free space */ 1805 if (current_space->local_remaining<nzi) { 1806 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1807 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1808 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1809 reallocs++; 1810 } 1811 1812 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1813 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1814 bj_ptr[i] = current_space->array; 1815 bjlvl_ptr[i] = current_space_lvl->array; 1816 1817 /* make sure the active row i has diagonal entry */ 1818 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1819 1820 current_space->array += nzi; 1821 current_space->local_used += nzi; 1822 current_space->local_remaining -= nzi; 1823 current_space_lvl->array += nzi; 1824 current_space_lvl->local_used += nzi; 1825 current_space_lvl->local_remaining -= nzi; 1826 } 1827 1828 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1829 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1830 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1831 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1832 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1833 1834 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1835 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1836 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1837 1838 #if defined(PETSC_USE_INFO) 1839 { 1840 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1841 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1842 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1843 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1844 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1845 if (diagonal_fill) { 1846 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1847 } 1848 } 1849 #endif 1850 /* put together the new matrix */ 1851 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1852 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1853 b = (Mat_SeqAIJ*)(fact)->data; 1854 b->free_a = PETSC_TRUE; 1855 b->free_ij = PETSC_TRUE; 1856 b->singlemalloc = PETSC_FALSE; 1857 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1858 b->j = bj; 1859 b->i = bi; 1860 b->diag = bdiag; 1861 b->ilen = 0; 1862 b->imax = 0; 1863 b->row = isrow; 1864 b->col = iscol; 1865 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1866 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1867 b->icol = isicol; 1868 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1869 /* In b structure: Free imax, ilen, old a, old j. 1870 Allocate bdiag, solve_work, new a, new j */ 1871 ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1872 b->maxnz = b->nz = bdiag[0]+1; 1873 (fact)->info.factor_mallocs = reallocs; 1874 (fact)->info.fill_ratio_given = f; 1875 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1876 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1877 if (a->inode.size) { 1878 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1879 } 1880 PetscFunctionReturn(0); 1881 } 1882 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1885 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1886 { 1887 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1888 IS isicol; 1889 PetscErrorCode ierr; 1890 const PetscInt *r,*ic; 1891 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1892 PetscInt *bi,*cols,nnz,*cols_lvl; 1893 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1894 PetscInt i,levels,diagonal_fill; 1895 PetscBool col_identity,row_identity; 1896 PetscReal f; 1897 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1898 PetscBT lnkbt; 1899 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1900 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1901 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1902 PetscBool missing; 1903 1904 PetscFunctionBegin; 1905 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1906 f = info->fill; 1907 levels = (PetscInt)info->levels; 1908 diagonal_fill = (PetscInt)info->diagonal_fill; 1909 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1910 1911 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1912 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1913 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1914 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1915 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1916 if (a->inode.size) { 1917 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1918 } 1919 fact->factortype = MAT_FACTOR_ILU; 1920 (fact)->info.factor_mallocs = 0; 1921 (fact)->info.fill_ratio_given = info->fill; 1922 (fact)->info.fill_ratio_needed = 1.0; 1923 b = (Mat_SeqAIJ*)(fact)->data; 1924 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1925 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1926 b->row = isrow; 1927 b->col = iscol; 1928 b->icol = isicol; 1929 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1930 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1931 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1936 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1937 1938 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1939 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1940 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1941 bi[0] = bdiag[0] = 0; 1942 1943 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1944 1945 /* create a linked list for storing column indices of the active row */ 1946 nlnk = n + 1; 1947 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1948 1949 /* initial FreeSpace size is f*(ai[n]+1) */ 1950 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1951 current_space = free_space; 1952 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1953 current_space_lvl = free_space_lvl; 1954 1955 for (i=0; i<n; i++) { 1956 nzi = 0; 1957 /* copy current row into linked list */ 1958 nnz = ai[r[i]+1] - ai[r[i]]; 1959 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1960 cols = aj + ai[r[i]]; 1961 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1962 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1963 nzi += nlnk; 1964 1965 /* make sure diagonal entry is included */ 1966 if (diagonal_fill && lnk[i] == -1) { 1967 fm = n; 1968 while (lnk[fm] < i) fm = lnk[fm]; 1969 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1970 lnk[fm] = i; 1971 lnk_lvl[i] = 0; 1972 nzi++; dcount++; 1973 } 1974 1975 /* add pivot rows into the active row */ 1976 nzbd = 0; 1977 prow = lnk[n]; 1978 while (prow < i) { 1979 nnz = bdiag[prow]; 1980 cols = bj_ptr[prow] + nnz + 1; 1981 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1982 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1983 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1984 nzi += nlnk; 1985 prow = lnk[prow]; 1986 nzbd++; 1987 } 1988 bdiag[i] = nzbd; 1989 bi[i+1] = bi[i] + nzi; 1990 1991 /* if free space is not available, make more free space */ 1992 if (current_space->local_remaining<nzi) { 1993 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1994 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1995 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1996 reallocs++; 1997 } 1998 1999 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2000 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2001 bj_ptr[i] = current_space->array; 2002 bjlvl_ptr[i] = current_space_lvl->array; 2003 2004 /* make sure the active row i has diagonal entry */ 2005 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2006 2007 current_space->array += nzi; 2008 current_space->local_used += nzi; 2009 current_space->local_remaining -= nzi; 2010 current_space_lvl->array += nzi; 2011 current_space_lvl->local_used += nzi; 2012 current_space_lvl->local_remaining -= nzi; 2013 } 2014 2015 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2016 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2017 2018 /* destroy list of free space and other temporary arrays */ 2019 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2020 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2021 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2022 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2023 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2024 2025 #if defined(PETSC_USE_INFO) 2026 { 2027 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2028 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2029 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2030 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2031 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2032 if (diagonal_fill) { 2033 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2034 } 2035 } 2036 #endif 2037 2038 /* put together the new matrix */ 2039 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2040 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2041 b = (Mat_SeqAIJ*)(fact)->data; 2042 b->free_a = PETSC_TRUE; 2043 b->free_ij = PETSC_TRUE; 2044 b->singlemalloc = PETSC_FALSE; 2045 ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2046 b->j = bj; 2047 b->i = bi; 2048 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2049 b->diag = bdiag; 2050 b->ilen = 0; 2051 b->imax = 0; 2052 b->row = isrow; 2053 b->col = iscol; 2054 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2055 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2056 b->icol = isicol; 2057 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2058 /* In b structure: Free imax, ilen, old a, old j. 2059 Allocate bdiag, solve_work, new a, new j */ 2060 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2061 b->maxnz = b->nz = bi[n] ; 2062 (fact)->info.factor_mallocs = reallocs; 2063 (fact)->info.fill_ratio_given = f; 2064 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2065 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2066 if (a->inode.size) { 2067 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2068 } 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2074 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2075 { 2076 Mat C = B; 2077 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2078 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2079 IS ip=b->row,iip = b->icol; 2080 PetscErrorCode ierr; 2081 const PetscInt *rip,*riip; 2082 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2083 PetscInt *ai=a->i,*aj=a->j; 2084 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2085 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2086 PetscBool perm_identity; 2087 FactorShiftCtx sctx; 2088 PetscReal rs; 2089 MatScalar d,*v; 2090 2091 PetscFunctionBegin; 2092 /* MatPivotSetUp(): initialize shift context sctx */ 2093 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2094 2095 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2096 sctx.shift_top = info->zeropivot; 2097 for (i=0; i<mbs; i++) { 2098 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2099 d = (aa)[a->diag[i]]; 2100 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2101 v = aa+ai[i]; 2102 nz = ai[i+1] - ai[i]; 2103 for (j=0; j<nz; j++) 2104 rs += PetscAbsScalar(v[j]); 2105 if (rs>sctx.shift_top) sctx.shift_top = rs; 2106 } 2107 sctx.shift_top *= 1.1; 2108 sctx.nshift_max = 5; 2109 sctx.shift_lo = 0.; 2110 sctx.shift_hi = 1.; 2111 } 2112 2113 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2114 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2115 2116 /* allocate working arrays 2117 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2118 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2119 */ 2120 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 2121 2122 do { 2123 sctx.newshift = PETSC_FALSE; 2124 2125 for (i=0; i<mbs; i++) c2r[i] = mbs; 2126 il[0] = 0; 2127 2128 for (k = 0; k<mbs; k++){ 2129 /* zero rtmp */ 2130 nz = bi[k+1] - bi[k]; 2131 bjtmp = bj + bi[k]; 2132 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2133 2134 /* load in initial unfactored row */ 2135 bval = ba + bi[k]; 2136 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2137 for (j = jmin; j < jmax; j++){ 2138 col = riip[aj[j]]; 2139 if (col >= k){ /* only take upper triangular entry */ 2140 rtmp[col] = aa[j]; 2141 *bval++ = 0.0; /* for in-place factorization */ 2142 } 2143 } 2144 /* shift the diagonal of the matrix: ZeropivotApply() */ 2145 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2146 2147 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2148 dk = rtmp[k]; 2149 i = c2r[k]; /* first row to be added to k_th row */ 2150 2151 while (i < k){ 2152 nexti = c2r[i]; /* next row to be added to k_th row */ 2153 2154 /* compute multiplier, update diag(k) and U(i,k) */ 2155 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2156 uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2157 dk += uikdi*ba[ili]; /* update diag[k] */ 2158 ba[ili] = uikdi; /* -U(i,k) */ 2159 2160 /* add multiple of row i to k-th row */ 2161 jmin = ili + 1; jmax = bi[i+1]; 2162 if (jmin < jmax){ 2163 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2164 /* update il and c2r for row i */ 2165 il[i] = jmin; 2166 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2167 } 2168 i = nexti; 2169 } 2170 2171 /* copy data into U(k,:) */ 2172 rs = 0.0; 2173 jmin = bi[k]; jmax = bi[k+1]-1; 2174 if (jmin < jmax) { 2175 for (j=jmin; j<jmax; j++){ 2176 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2177 } 2178 /* add the k-th row into il and c2r */ 2179 il[k] = jmin; 2180 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2181 } 2182 2183 /* MatPivotCheck() */ 2184 sctx.rs = rs; 2185 sctx.pv = dk; 2186 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2187 if(sctx.newshift) break; 2188 dk = sctx.pv; 2189 2190 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2191 } 2192 } while (sctx.newshift); 2193 2194 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2195 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2196 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2197 2198 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2199 if (perm_identity){ 2200 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2201 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2202 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2203 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2204 } else { 2205 B->ops->solve = MatSolve_SeqSBAIJ_1; 2206 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2207 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2208 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2209 } 2210 2211 C->assembled = PETSC_TRUE; 2212 C->preallocated = PETSC_TRUE; 2213 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2214 2215 /* MatPivotView() */ 2216 if (sctx.nshift){ 2217 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2218 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 2219 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2220 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2221 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 2222 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 2223 } 2224 } 2225 PetscFunctionReturn(0); 2226 } 2227 2228 #undef __FUNCT__ 2229 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2230 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2231 { 2232 Mat C = B; 2233 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2234 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2235 IS ip=b->row,iip = b->icol; 2236 PetscErrorCode ierr; 2237 const PetscInt *rip,*riip; 2238 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2239 PetscInt *ai=a->i,*aj=a->j; 2240 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2241 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2242 PetscBool perm_identity; 2243 FactorShiftCtx sctx; 2244 PetscReal rs; 2245 MatScalar d,*v; 2246 2247 PetscFunctionBegin; 2248 /* MatPivotSetUp(): initialize shift context sctx */ 2249 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2250 2251 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2252 sctx.shift_top = info->zeropivot; 2253 for (i=0; i<mbs; i++) { 2254 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2255 d = (aa)[a->diag[i]]; 2256 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2257 v = aa+ai[i]; 2258 nz = ai[i+1] - ai[i]; 2259 for (j=0; j<nz; j++) 2260 rs += PetscAbsScalar(v[j]); 2261 if (rs>sctx.shift_top) sctx.shift_top = rs; 2262 } 2263 sctx.shift_top *= 1.1; 2264 sctx.nshift_max = 5; 2265 sctx.shift_lo = 0.; 2266 sctx.shift_hi = 1.; 2267 } 2268 2269 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2270 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2271 2272 /* initialization */ 2273 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 2274 2275 do { 2276 sctx.newshift = PETSC_FALSE; 2277 2278 for (i=0; i<mbs; i++) jl[i] = mbs; 2279 il[0] = 0; 2280 2281 for (k = 0; k<mbs; k++){ 2282 /* zero rtmp */ 2283 nz = bi[k+1] - bi[k]; 2284 bjtmp = bj + bi[k]; 2285 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2286 2287 bval = ba + bi[k]; 2288 /* initialize k-th row by the perm[k]-th row of A */ 2289 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2290 for (j = jmin; j < jmax; j++){ 2291 col = riip[aj[j]]; 2292 if (col >= k){ /* only take upper triangular entry */ 2293 rtmp[col] = aa[j]; 2294 *bval++ = 0.0; /* for in-place factorization */ 2295 } 2296 } 2297 /* shift the diagonal of the matrix */ 2298 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2299 2300 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2301 dk = rtmp[k]; 2302 i = jl[k]; /* first row to be added to k_th row */ 2303 2304 while (i < k){ 2305 nexti = jl[i]; /* next row to be added to k_th row */ 2306 2307 /* compute multiplier, update diag(k) and U(i,k) */ 2308 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2309 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2310 dk += uikdi*ba[ili]; 2311 ba[ili] = uikdi; /* -U(i,k) */ 2312 2313 /* add multiple of row i to k-th row */ 2314 jmin = ili + 1; jmax = bi[i+1]; 2315 if (jmin < jmax){ 2316 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2317 /* update il and jl for row i */ 2318 il[i] = jmin; 2319 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2320 } 2321 i = nexti; 2322 } 2323 2324 /* shift the diagonals when zero pivot is detected */ 2325 /* compute rs=sum of abs(off-diagonal) */ 2326 rs = 0.0; 2327 jmin = bi[k]+1; 2328 nz = bi[k+1] - jmin; 2329 bcol = bj + jmin; 2330 for (j=0; j<nz; j++) { 2331 rs += PetscAbsScalar(rtmp[bcol[j]]); 2332 } 2333 2334 sctx.rs = rs; 2335 sctx.pv = dk; 2336 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2337 if (sctx.newshift) break; 2338 dk = sctx.pv; 2339 2340 /* copy data into U(k,:) */ 2341 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2342 jmin = bi[k]+1; jmax = bi[k+1]; 2343 if (jmin < jmax) { 2344 for (j=jmin; j<jmax; j++){ 2345 col = bj[j]; ba[j] = rtmp[col]; 2346 } 2347 /* add the k-th row into il and jl */ 2348 il[k] = jmin; 2349 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2350 } 2351 } 2352 } while (sctx.newshift); 2353 2354 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2355 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2356 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2357 2358 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2359 if (perm_identity){ 2360 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2361 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2362 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2363 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2364 } else { 2365 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2366 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2367 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2368 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2369 } 2370 2371 C->assembled = PETSC_TRUE; 2372 C->preallocated = PETSC_TRUE; 2373 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2374 if (sctx.nshift){ 2375 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2376 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2377 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2378 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2379 } 2380 } 2381 PetscFunctionReturn(0); 2382 } 2383 2384 /* 2385 icc() under revised new data structure. 2386 Factored arrays bj and ba are stored as 2387 U(0,:),...,U(i,:),U(n-1,:) 2388 2389 ui=fact->i is an array of size n+1, in which 2390 ui+ 2391 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2392 ui[n]: points to U(n-1,n-1)+1 2393 2394 udiag=fact->diag is an array of size n,in which 2395 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2396 2397 U(i,:) contains udiag[i] as its last entry, i.e., 2398 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2399 */ 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2403 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2404 { 2405 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2406 Mat_SeqSBAIJ *b; 2407 PetscErrorCode ierr; 2408 PetscBool perm_identity,missing; 2409 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2410 const PetscInt *rip,*riip; 2411 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2412 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2413 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2414 PetscReal fill=info->fill,levels=info->levels; 2415 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2416 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2417 PetscBT lnkbt; 2418 IS iperm; 2419 2420 PetscFunctionBegin; 2421 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2422 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2423 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2424 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2425 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2426 2427 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2428 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2429 ui[0] = 0; 2430 2431 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2432 if (!levels && perm_identity) { 2433 for (i=0; i<am; i++) { 2434 ncols = ai[i+1] - a->diag[i]; 2435 ui[i+1] = ui[i] + ncols; 2436 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2437 } 2438 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2439 cols = uj; 2440 for (i=0; i<am; i++) { 2441 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2442 ncols = ai[i+1] - a->diag[i] -1; 2443 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2444 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2445 } 2446 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2447 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2448 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2449 2450 /* initialization */ 2451 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2452 2453 /* jl: linked list for storing indices of the pivot rows 2454 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2455 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2456 for (i=0; i<am; i++){ 2457 jl[i] = am; il[i] = 0; 2458 } 2459 2460 /* create and initialize a linked list for storing column indices of the active row k */ 2461 nlnk = am + 1; 2462 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2463 2464 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2465 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2466 current_space = free_space; 2467 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2468 current_space_lvl = free_space_lvl; 2469 2470 for (k=0; k<am; k++){ /* for each active row k */ 2471 /* initialize lnk by the column indices of row rip[k] of A */ 2472 nzk = 0; 2473 ncols = ai[rip[k]+1] - ai[rip[k]]; 2474 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2475 ncols_upper = 0; 2476 for (j=0; j<ncols; j++){ 2477 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2478 if (riip[i] >= k){ /* only take upper triangular entry */ 2479 ajtmp[ncols_upper] = i; 2480 ncols_upper++; 2481 } 2482 } 2483 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2484 nzk += nlnk; 2485 2486 /* update lnk by computing fill-in for each pivot row to be merged in */ 2487 prow = jl[k]; /* 1st pivot row */ 2488 2489 while (prow < k){ 2490 nextprow = jl[prow]; 2491 2492 /* merge prow into k-th row */ 2493 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2494 jmax = ui[prow+1]; 2495 ncols = jmax-jmin; 2496 i = jmin - ui[prow]; 2497 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2498 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2499 j = *(uj - 1); 2500 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2501 nzk += nlnk; 2502 2503 /* update il and jl for prow */ 2504 if (jmin < jmax){ 2505 il[prow] = jmin; 2506 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2507 } 2508 prow = nextprow; 2509 } 2510 2511 /* if free space is not available, make more free space */ 2512 if (current_space->local_remaining<nzk) { 2513 i = am - k + 1; /* num of unfactored rows */ 2514 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2515 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2516 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2517 reallocs++; 2518 } 2519 2520 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2521 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2522 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2523 2524 /* add the k-th row into il and jl */ 2525 if (nzk > 1){ 2526 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2527 jl[k] = jl[i]; jl[i] = k; 2528 il[k] = ui[k] + 1; 2529 } 2530 uj_ptr[k] = current_space->array; 2531 uj_lvl_ptr[k] = current_space_lvl->array; 2532 2533 current_space->array += nzk; 2534 current_space->local_used += nzk; 2535 current_space->local_remaining -= nzk; 2536 2537 current_space_lvl->array += nzk; 2538 current_space_lvl->local_used += nzk; 2539 current_space_lvl->local_remaining -= nzk; 2540 2541 ui[k+1] = ui[k] + nzk; 2542 } 2543 2544 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2545 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2546 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2547 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2548 2549 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2550 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2551 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2552 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2553 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2554 2555 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2556 2557 /* put together the new matrix in MATSEQSBAIJ format */ 2558 b = (Mat_SeqSBAIJ*)(fact)->data; 2559 b->singlemalloc = PETSC_FALSE; 2560 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2561 b->j = uj; 2562 b->i = ui; 2563 b->diag = udiag; 2564 b->free_diag = PETSC_TRUE; 2565 b->ilen = 0; 2566 b->imax = 0; 2567 b->row = perm; 2568 b->col = perm; 2569 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2570 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2571 b->icol = iperm; 2572 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2573 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2574 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2575 b->maxnz = b->nz = ui[am]; 2576 b->free_a = PETSC_TRUE; 2577 b->free_ij = PETSC_TRUE; 2578 2579 fact->info.factor_mallocs = reallocs; 2580 fact->info.fill_ratio_given = fill; 2581 if (ai[am] != 0) { 2582 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2583 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2584 } else { 2585 fact->info.fill_ratio_needed = 0.0; 2586 } 2587 #if defined(PETSC_USE_INFO) 2588 if (ai[am] != 0) { 2589 PetscReal af = fact->info.fill_ratio_needed; 2590 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2591 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2592 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2593 } else { 2594 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2595 } 2596 #endif 2597 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2598 PetscFunctionReturn(0); 2599 } 2600 2601 #undef __FUNCT__ 2602 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2603 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2604 { 2605 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2606 Mat_SeqSBAIJ *b; 2607 PetscErrorCode ierr; 2608 PetscBool perm_identity,missing; 2609 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2610 const PetscInt *rip,*riip; 2611 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2612 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2613 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2614 PetscReal fill=info->fill,levels=info->levels; 2615 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2616 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2617 PetscBT lnkbt; 2618 IS iperm; 2619 2620 PetscFunctionBegin; 2621 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2622 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2623 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2624 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2625 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2626 2627 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2628 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2629 ui[0] = 0; 2630 2631 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2632 if (!levels && perm_identity) { 2633 2634 for (i=0; i<am; i++) { 2635 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2636 udiag[i] = ui[i]; 2637 } 2638 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2639 cols = uj; 2640 for (i=0; i<am; i++) { 2641 aj = a->j + a->diag[i]; 2642 ncols = ui[i+1] - ui[i]; 2643 for (j=0; j<ncols; j++) *cols++ = *aj++; 2644 } 2645 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2646 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2647 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2648 2649 /* initialization */ 2650 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2651 2652 /* jl: linked list for storing indices of the pivot rows 2653 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2654 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2655 for (i=0; i<am; i++){ 2656 jl[i] = am; il[i] = 0; 2657 } 2658 2659 /* create and initialize a linked list for storing column indices of the active row k */ 2660 nlnk = am + 1; 2661 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2662 2663 /* initial FreeSpace size is fill*(ai[am]+1) */ 2664 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2665 current_space = free_space; 2666 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2667 current_space_lvl = free_space_lvl; 2668 2669 for (k=0; k<am; k++){ /* for each active row k */ 2670 /* initialize lnk by the column indices of row rip[k] of A */ 2671 nzk = 0; 2672 ncols = ai[rip[k]+1] - ai[rip[k]]; 2673 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2674 ncols_upper = 0; 2675 for (j=0; j<ncols; j++){ 2676 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2677 if (riip[i] >= k){ /* only take upper triangular entry */ 2678 ajtmp[ncols_upper] = i; 2679 ncols_upper++; 2680 } 2681 } 2682 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2683 nzk += nlnk; 2684 2685 /* update lnk by computing fill-in for each pivot row to be merged in */ 2686 prow = jl[k]; /* 1st pivot row */ 2687 2688 while (prow < k){ 2689 nextprow = jl[prow]; 2690 2691 /* merge prow into k-th row */ 2692 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2693 jmax = ui[prow+1]; 2694 ncols = jmax-jmin; 2695 i = jmin - ui[prow]; 2696 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2697 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2698 j = *(uj - 1); 2699 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2700 nzk += nlnk; 2701 2702 /* update il and jl for prow */ 2703 if (jmin < jmax){ 2704 il[prow] = jmin; 2705 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2706 } 2707 prow = nextprow; 2708 } 2709 2710 /* if free space is not available, make more free space */ 2711 if (current_space->local_remaining<nzk) { 2712 i = am - k + 1; /* num of unfactored rows */ 2713 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2714 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2715 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2716 reallocs++; 2717 } 2718 2719 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2720 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2721 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2722 2723 /* add the k-th row into il and jl */ 2724 if (nzk > 1){ 2725 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2726 jl[k] = jl[i]; jl[i] = k; 2727 il[k] = ui[k] + 1; 2728 } 2729 uj_ptr[k] = current_space->array; 2730 uj_lvl_ptr[k] = current_space_lvl->array; 2731 2732 current_space->array += nzk; 2733 current_space->local_used += nzk; 2734 current_space->local_remaining -= nzk; 2735 2736 current_space_lvl->array += nzk; 2737 current_space_lvl->local_used += nzk; 2738 current_space_lvl->local_remaining -= nzk; 2739 2740 ui[k+1] = ui[k] + nzk; 2741 } 2742 2743 #if defined(PETSC_USE_INFO) 2744 if (ai[am] != 0) { 2745 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2746 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2747 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2748 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2749 } else { 2750 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2751 } 2752 #endif 2753 2754 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2755 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2756 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2757 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2758 2759 /* destroy list of free space and other temporary array(s) */ 2760 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2761 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2762 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2763 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2764 2765 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2766 2767 /* put together the new matrix in MATSEQSBAIJ format */ 2768 2769 b = (Mat_SeqSBAIJ*)fact->data; 2770 b->singlemalloc = PETSC_FALSE; 2771 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2772 b->j = uj; 2773 b->i = ui; 2774 b->diag = udiag; 2775 b->free_diag = PETSC_TRUE; 2776 b->ilen = 0; 2777 b->imax = 0; 2778 b->row = perm; 2779 b->col = perm; 2780 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2781 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2782 b->icol = iperm; 2783 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2784 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2785 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2786 b->maxnz = b->nz = ui[am]; 2787 b->free_a = PETSC_TRUE; 2788 b->free_ij = PETSC_TRUE; 2789 2790 fact->info.factor_mallocs = reallocs; 2791 fact->info.fill_ratio_given = fill; 2792 if (ai[am] != 0) { 2793 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2794 } else { 2795 fact->info.fill_ratio_needed = 0.0; 2796 } 2797 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2798 PetscFunctionReturn(0); 2799 } 2800 2801 #undef __FUNCT__ 2802 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2803 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2804 { 2805 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2806 Mat_SeqSBAIJ *b; 2807 PetscErrorCode ierr; 2808 PetscBool perm_identity; 2809 PetscReal fill = info->fill; 2810 const PetscInt *rip,*riip; 2811 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2812 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2813 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2814 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2815 PetscBT lnkbt; 2816 IS iperm; 2817 2818 PetscFunctionBegin; 2819 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2820 /* check whether perm is the identity mapping */ 2821 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2822 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2823 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2824 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2825 2826 /* initialization */ 2827 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2828 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2829 ui[0] = 0; 2830 2831 /* jl: linked list for storing indices of the pivot rows 2832 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2833 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2834 for (i=0; i<am; i++){ 2835 jl[i] = am; il[i] = 0; 2836 } 2837 2838 /* create and initialize a linked list for storing column indices of the active row k */ 2839 nlnk = am + 1; 2840 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2841 2842 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2843 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2844 current_space = free_space; 2845 2846 for (k=0; k<am; k++){ /* for each active row k */ 2847 /* initialize lnk by the column indices of row rip[k] of A */ 2848 nzk = 0; 2849 ncols = ai[rip[k]+1] - ai[rip[k]]; 2850 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2851 ncols_upper = 0; 2852 for (j=0; j<ncols; j++){ 2853 i = riip[*(aj + ai[rip[k]] + j)]; 2854 if (i >= k){ /* only take upper triangular entry */ 2855 cols[ncols_upper] = i; 2856 ncols_upper++; 2857 } 2858 } 2859 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2860 nzk += nlnk; 2861 2862 /* update lnk by computing fill-in for each pivot row to be merged in */ 2863 prow = jl[k]; /* 1st pivot row */ 2864 2865 while (prow < k){ 2866 nextprow = jl[prow]; 2867 /* merge prow into k-th row */ 2868 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2869 jmax = ui[prow+1]; 2870 ncols = jmax-jmin; 2871 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2872 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2873 nzk += nlnk; 2874 2875 /* update il and jl for prow */ 2876 if (jmin < jmax){ 2877 il[prow] = jmin; 2878 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 2879 } 2880 prow = nextprow; 2881 } 2882 2883 /* if free space is not available, make more free space */ 2884 if (current_space->local_remaining<nzk) { 2885 i = am - k + 1; /* num of unfactored rows */ 2886 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2887 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2888 reallocs++; 2889 } 2890 2891 /* copy data into free space, then initialize lnk */ 2892 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2893 2894 /* add the k-th row into il and jl */ 2895 if (nzk > 1){ 2896 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2897 jl[k] = jl[i]; jl[i] = k; 2898 il[k] = ui[k] + 1; 2899 } 2900 ui_ptr[k] = current_space->array; 2901 current_space->array += nzk; 2902 current_space->local_used += nzk; 2903 current_space->local_remaining -= nzk; 2904 2905 ui[k+1] = ui[k] + nzk; 2906 } 2907 2908 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2909 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2910 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2911 2912 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2913 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2914 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2915 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2916 2917 /* put together the new matrix in MATSEQSBAIJ format */ 2918 2919 b = (Mat_SeqSBAIJ*)fact->data; 2920 b->singlemalloc = PETSC_FALSE; 2921 b->free_a = PETSC_TRUE; 2922 b->free_ij = PETSC_TRUE; 2923 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2924 b->j = uj; 2925 b->i = ui; 2926 b->diag = udiag; 2927 b->free_diag = PETSC_TRUE; 2928 b->ilen = 0; 2929 b->imax = 0; 2930 b->row = perm; 2931 b->col = perm; 2932 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2933 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2934 b->icol = iperm; 2935 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2936 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2937 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2938 b->maxnz = b->nz = ui[am]; 2939 2940 fact->info.factor_mallocs = reallocs; 2941 fact->info.fill_ratio_given = fill; 2942 if (ai[am] != 0) { 2943 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2944 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2945 } else { 2946 fact->info.fill_ratio_needed = 0.0; 2947 } 2948 #if defined(PETSC_USE_INFO) 2949 if (ai[am] != 0) { 2950 PetscReal af = fact->info.fill_ratio_needed; 2951 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2952 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2953 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2954 } else { 2955 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2956 } 2957 #endif 2958 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2959 PetscFunctionReturn(0); 2960 } 2961 2962 #undef __FUNCT__ 2963 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 2964 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2965 { 2966 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2967 Mat_SeqSBAIJ *b; 2968 PetscErrorCode ierr; 2969 PetscBool perm_identity; 2970 PetscReal fill = info->fill; 2971 const PetscInt *rip,*riip; 2972 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2973 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2974 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2975 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2976 PetscBT lnkbt; 2977 IS iperm; 2978 2979 PetscFunctionBegin; 2980 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2981 /* check whether perm is the identity mapping */ 2982 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2983 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2984 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2985 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2986 2987 /* initialization */ 2988 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2989 ui[0] = 0; 2990 2991 /* jl: linked list for storing indices of the pivot rows 2992 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2993 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2994 for (i=0; i<am; i++){ 2995 jl[i] = am; il[i] = 0; 2996 } 2997 2998 /* create and initialize a linked list for storing column indices of the active row k */ 2999 nlnk = am + 1; 3000 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3001 3002 /* initial FreeSpace size is fill*(ai[am]+1) */ 3003 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3004 current_space = free_space; 3005 3006 for (k=0; k<am; k++){ /* for each active row k */ 3007 /* initialize lnk by the column indices of row rip[k] of A */ 3008 nzk = 0; 3009 ncols = ai[rip[k]+1] - ai[rip[k]]; 3010 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3011 ncols_upper = 0; 3012 for (j=0; j<ncols; j++){ 3013 i = riip[*(aj + ai[rip[k]] + j)]; 3014 if (i >= k){ /* only take upper triangular entry */ 3015 cols[ncols_upper] = i; 3016 ncols_upper++; 3017 } 3018 } 3019 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3020 nzk += nlnk; 3021 3022 /* update lnk by computing fill-in for each pivot row to be merged in */ 3023 prow = jl[k]; /* 1st pivot row */ 3024 3025 while (prow < k){ 3026 nextprow = jl[prow]; 3027 /* merge prow into k-th row */ 3028 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3029 jmax = ui[prow+1]; 3030 ncols = jmax-jmin; 3031 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3032 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3033 nzk += nlnk; 3034 3035 /* update il and jl for prow */ 3036 if (jmin < jmax){ 3037 il[prow] = jmin; 3038 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3039 } 3040 prow = nextprow; 3041 } 3042 3043 /* if free space is not available, make more free space */ 3044 if (current_space->local_remaining<nzk) { 3045 i = am - k + 1; /* num of unfactored rows */ 3046 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3047 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3048 reallocs++; 3049 } 3050 3051 /* copy data into free space, then initialize lnk */ 3052 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3053 3054 /* add the k-th row into il and jl */ 3055 if (nzk-1 > 0){ 3056 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3057 jl[k] = jl[i]; jl[i] = k; 3058 il[k] = ui[k] + 1; 3059 } 3060 ui_ptr[k] = current_space->array; 3061 current_space->array += nzk; 3062 current_space->local_used += nzk; 3063 current_space->local_remaining -= nzk; 3064 3065 ui[k+1] = ui[k] + nzk; 3066 } 3067 3068 #if defined(PETSC_USE_INFO) 3069 if (ai[am] != 0) { 3070 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3071 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3072 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3073 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3074 } else { 3075 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3076 } 3077 #endif 3078 3079 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3080 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3081 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3082 3083 /* destroy list of free space and other temporary array(s) */ 3084 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3085 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3086 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3087 3088 /* put together the new matrix in MATSEQSBAIJ format */ 3089 3090 b = (Mat_SeqSBAIJ*)fact->data; 3091 b->singlemalloc = PETSC_FALSE; 3092 b->free_a = PETSC_TRUE; 3093 b->free_ij = PETSC_TRUE; 3094 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 3095 b->j = uj; 3096 b->i = ui; 3097 b->diag = 0; 3098 b->ilen = 0; 3099 b->imax = 0; 3100 b->row = perm; 3101 b->col = perm; 3102 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3103 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3104 b->icol = iperm; 3105 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3106 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3107 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3108 b->maxnz = b->nz = ui[am]; 3109 3110 fact->info.factor_mallocs = reallocs; 3111 fact->info.fill_ratio_given = fill; 3112 if (ai[am] != 0) { 3113 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3114 } else { 3115 fact->info.fill_ratio_needed = 0.0; 3116 } 3117 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3118 PetscFunctionReturn(0); 3119 } 3120 3121 #undef __FUNCT__ 3122 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3123 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3124 { 3125 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3126 PetscErrorCode ierr; 3127 PetscInt n = A->rmap->n; 3128 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3129 PetscScalar *x,sum; 3130 const PetscScalar *b; 3131 const MatScalar *aa = a->a,*v; 3132 PetscInt i,nz; 3133 3134 PetscFunctionBegin; 3135 if (!n) PetscFunctionReturn(0); 3136 3137 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3138 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3139 3140 /* forward solve the lower triangular */ 3141 x[0] = b[0]; 3142 v = aa; 3143 vi = aj; 3144 for (i=1; i<n; i++) { 3145 nz = ai[i+1] - ai[i]; 3146 sum = b[i]; 3147 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3148 v += nz; 3149 vi += nz; 3150 x[i] = sum; 3151 } 3152 3153 /* backward solve the upper triangular */ 3154 for (i=n-1; i>=0; i--){ 3155 v = aa + adiag[i+1] + 1; 3156 vi = aj + adiag[i+1] + 1; 3157 nz = adiag[i] - adiag[i+1]-1; 3158 sum = x[i]; 3159 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3160 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3161 } 3162 3163 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3164 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3165 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3166 PetscFunctionReturn(0); 3167 } 3168 3169 #undef __FUNCT__ 3170 #define __FUNCT__ "MatSolve_SeqAIJ" 3171 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3172 { 3173 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3174 IS iscol = a->col,isrow = a->row; 3175 PetscErrorCode ierr; 3176 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3177 const PetscInt *rout,*cout,*r,*c; 3178 PetscScalar *x,*tmp,sum; 3179 const PetscScalar *b; 3180 const MatScalar *aa = a->a,*v; 3181 3182 PetscFunctionBegin; 3183 if (!n) PetscFunctionReturn(0); 3184 3185 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3186 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3187 tmp = a->solve_work; 3188 3189 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3190 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3191 3192 /* forward solve the lower triangular */ 3193 tmp[0] = b[r[0]]; 3194 v = aa; 3195 vi = aj; 3196 for (i=1; i<n; i++) { 3197 nz = ai[i+1] - ai[i]; 3198 sum = b[r[i]]; 3199 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3200 tmp[i] = sum; 3201 v += nz; vi += nz; 3202 } 3203 3204 /* backward solve the upper triangular */ 3205 for (i=n-1; i>=0; i--){ 3206 v = aa + adiag[i+1]+1; 3207 vi = aj + adiag[i+1]+1; 3208 nz = adiag[i]-adiag[i+1]-1; 3209 sum = tmp[i]; 3210 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3211 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3212 } 3213 3214 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3215 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3216 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3217 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3218 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3219 PetscFunctionReturn(0); 3220 } 3221 3222 #undef __FUNCT__ 3223 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3224 /* 3225 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3226 */ 3227 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3228 { 3229 Mat B = *fact; 3230 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3231 IS isicol; 3232 PetscErrorCode ierr; 3233 const PetscInt *r,*ic; 3234 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3235 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3236 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3237 PetscInt nlnk,*lnk; 3238 PetscBT lnkbt; 3239 PetscBool row_identity,icol_identity; 3240 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3241 const PetscInt *ics; 3242 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3243 PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount; 3244 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3245 PetscBool missing; 3246 3247 PetscFunctionBegin; 3248 3249 if (dt == PETSC_DEFAULT) dt = 0.005; 3250 if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */ 3251 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3252 3253 /* ------- symbolic factorization, can be reused ---------*/ 3254 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3255 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3256 adiag=a->diag; 3257 3258 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3259 3260 /* bdiag is location of diagonal in factor */ 3261 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3262 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3263 3264 /* allocate row pointers bi */ 3265 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3266 3267 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3268 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3269 nnz_max = ai[n]+2*n*dtcount+2; 3270 3271 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3272 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3273 3274 /* put together the new matrix */ 3275 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3276 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3277 b = (Mat_SeqAIJ*)B->data; 3278 b->free_a = PETSC_TRUE; 3279 b->free_ij = PETSC_TRUE; 3280 b->singlemalloc = PETSC_FALSE; 3281 b->a = ba; 3282 b->j = bj; 3283 b->i = bi; 3284 b->diag = bdiag; 3285 b->ilen = 0; 3286 b->imax = 0; 3287 b->row = isrow; 3288 b->col = iscol; 3289 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3290 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3291 b->icol = isicol; 3292 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3293 3294 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3295 b->maxnz = nnz_max; 3296 3297 B->factortype = MAT_FACTOR_ILUDT; 3298 B->info.factor_mallocs = 0; 3299 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3300 CHKMEMQ; 3301 /* ------- end of symbolic factorization ---------*/ 3302 3303 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3304 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3305 ics = ic; 3306 3307 /* linked list for storing column indices of the active row */ 3308 nlnk = n + 1; 3309 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3310 3311 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3312 ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr); 3313 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3314 ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr); 3315 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3316 3317 bi[0] = 0; 3318 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3319 bdiag_rev[n] = bdiag[0]; 3320 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3321 for (i=0; i<n; i++) { 3322 /* copy initial fill into linked list */ 3323 nzi = 0; /* nonzeros for active row i */ 3324 nzi = ai[r[i]+1] - ai[r[i]]; 3325 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3326 nzi_al = adiag[r[i]] - ai[r[i]]; 3327 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3328 ajtmp = aj + ai[r[i]]; 3329 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3330 3331 /* load in initial (unfactored row) */ 3332 aatmp = a->a + ai[r[i]]; 3333 for (j=0; j<nzi; j++) { 3334 rtmp[ics[*ajtmp++]] = *aatmp++; 3335 } 3336 3337 /* add pivot rows into linked list */ 3338 row = lnk[n]; 3339 while (row < i ) { 3340 nzi_bl = bi[row+1] - bi[row] + 1; 3341 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3342 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3343 nzi += nlnk; 3344 row = lnk[row]; 3345 } 3346 3347 /* copy data from lnk into jtmp, then initialize lnk */ 3348 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3349 3350 /* numerical factorization */ 3351 bjtmp = jtmp; 3352 row = *bjtmp++; /* 1st pivot row */ 3353 while ( row < i ) { 3354 pc = rtmp + row; 3355 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3356 multiplier = (*pc) * (*pv); 3357 *pc = multiplier; 3358 if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */ 3359 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3360 pv = ba + bdiag[row+1] + 1; 3361 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3362 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3363 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3364 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3365 } 3366 row = *bjtmp++; 3367 } 3368 3369 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3370 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3371 nzi_bl = 0; j = 0; 3372 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 3373 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3374 nzi_bl++; j++; 3375 } 3376 nzi_bu = nzi - nzi_bl -1; 3377 while (j < nzi){ 3378 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3379 j++; 3380 } 3381 3382 bjtmp = bj + bi[i]; 3383 batmp = ba + bi[i]; 3384 /* apply level dropping rule to L part */ 3385 ncut = nzi_al + dtcount; 3386 if (ncut < nzi_bl){ 3387 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3388 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3389 } else { 3390 ncut = nzi_bl; 3391 } 3392 for (j=0; j<ncut; j++){ 3393 bjtmp[j] = jtmp[j]; 3394 batmp[j] = vtmp[j]; 3395 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3396 } 3397 bi[i+1] = bi[i] + ncut; 3398 nzi = ncut + 1; 3399 3400 /* apply level dropping rule to U part */ 3401 ncut = nzi_au + dtcount; 3402 if (ncut < nzi_bu){ 3403 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3404 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3405 } else { 3406 ncut = nzi_bu; 3407 } 3408 nzi += ncut; 3409 3410 /* mark bdiagonal */ 3411 bdiag[i+1] = bdiag[i] - (ncut + 1); 3412 bdiag_rev[n-i-1] = bdiag[i+1]; 3413 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3414 bjtmp = bj + bdiag[i]; 3415 batmp = ba + bdiag[i]; 3416 *bjtmp = i; 3417 *batmp = diag_tmp; /* rtmp[i]; */ 3418 if (*batmp == 0.0) { 3419 *batmp = dt+shift; 3420 /* printf(" row %d add shift %g\n",i,shift); */ 3421 } 3422 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3423 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3424 3425 bjtmp = bj + bdiag[i+1]+1; 3426 batmp = ba + bdiag[i+1]+1; 3427 for (k=0; k<ncut; k++){ 3428 bjtmp[k] = jtmp[nzi_bl+1+k]; 3429 batmp[k] = vtmp[nzi_bl+1+k]; 3430 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3431 } 3432 /* printf("\n"); */ 3433 3434 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3435 /* 3436 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3437 printf(" ----------------------------\n"); 3438 */ 3439 } /* for (i=0; i<n; i++) */ 3440 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3441 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3442 3443 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3444 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3445 3446 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3447 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3448 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3449 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3450 3451 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3452 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3453 3454 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3455 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3456 if (row_identity && icol_identity) { 3457 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3458 } else { 3459 B->ops->solve = MatSolve_SeqAIJ; 3460 } 3461 3462 B->ops->solveadd = 0; 3463 B->ops->solvetranspose = 0; 3464 B->ops->solvetransposeadd = 0; 3465 B->ops->matsolve = 0; 3466 B->assembled = PETSC_TRUE; 3467 B->preallocated = PETSC_TRUE; 3468 PetscFunctionReturn(0); 3469 } 3470 3471 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3472 #undef __FUNCT__ 3473 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3474 /* 3475 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3476 */ 3477 3478 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3479 { 3480 PetscErrorCode ierr; 3481 3482 PetscFunctionBegin; 3483 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3484 PetscFunctionReturn(0); 3485 } 3486 3487 /* 3488 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3489 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3490 */ 3491 #undef __FUNCT__ 3492 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3493 /* 3494 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3495 */ 3496 3497 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3498 { 3499 Mat C=fact; 3500 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 3501 IS isrow = b->row,isicol = b->icol; 3502 PetscErrorCode ierr; 3503 const PetscInt *r,*ic,*ics; 3504 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3505 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3506 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3507 PetscReal dt=info->dt,shift=info->shiftamount; 3508 PetscBool row_identity, col_identity; 3509 3510 PetscFunctionBegin; 3511 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3512 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3513 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3514 ics = ic; 3515 3516 for (i=0; i<n; i++){ 3517 /* initialize rtmp array */ 3518 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3519 bjtmp = bj + bi[i]; 3520 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3521 rtmp[i] = 0.0; 3522 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3523 bjtmp = bj + bdiag[i+1] + 1; 3524 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3525 3526 /* load in initial unfactored row of A */ 3527 /* printf("row %d\n",i); */ 3528 nz = ai[r[i]+1] - ai[r[i]]; 3529 ajtmp = aj + ai[r[i]]; 3530 v = aa + ai[r[i]]; 3531 for (j=0; j<nz; j++) { 3532 rtmp[ics[*ajtmp++]] = v[j]; 3533 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3534 } 3535 /* printf("\n"); */ 3536 3537 /* numerical factorization */ 3538 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3539 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3540 k = 0; 3541 while (k < nzl){ 3542 row = *bjtmp++; 3543 /* printf(" prow %d\n",row); */ 3544 pc = rtmp + row; 3545 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3546 multiplier = (*pc) * (*pv); 3547 *pc = multiplier; 3548 if (PetscAbsScalar(multiplier) > dt){ 3549 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3550 pv = b->a + bdiag[row+1] + 1; 3551 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3552 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3553 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3554 } 3555 k++; 3556 } 3557 3558 /* finished row so stick it into b->a */ 3559 /* L-part */ 3560 pv = b->a + bi[i] ; 3561 pj = bj + bi[i] ; 3562 nzl = bi[i+1] - bi[i]; 3563 for (j=0; j<nzl; j++) { 3564 pv[j] = rtmp[pj[j]]; 3565 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3566 } 3567 3568 /* diagonal: invert diagonal entries for simplier triangular solves */ 3569 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3570 b->a[bdiag[i]] = 1.0/rtmp[i]; 3571 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3572 3573 /* U-part */ 3574 pv = b->a + bdiag[i+1] + 1; 3575 pj = bj + bdiag[i+1] + 1; 3576 nzu = bdiag[i] - bdiag[i+1] - 1; 3577 for (j=0; j<nzu; j++) { 3578 pv[j] = rtmp[pj[j]]; 3579 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3580 } 3581 /* printf("\n"); */ 3582 } 3583 3584 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3585 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3586 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3587 3588 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3589 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3590 if (row_identity && col_identity) { 3591 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3592 } else { 3593 C->ops->solve = MatSolve_SeqAIJ; 3594 } 3595 C->ops->solveadd = 0; 3596 C->ops->solvetranspose = 0; 3597 C->ops->solvetransposeadd = 0; 3598 C->ops->matsolve = 0; 3599 C->assembled = PETSC_TRUE; 3600 C->preallocated = PETSC_TRUE; 3601 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3602 PetscFunctionReturn(0); 3603 } 3604