1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 9 /* 10 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 11 12 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 13 */ 14 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 15 { 16 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 19 const PetscInt *ai = a->i, *aj = a->j; 20 const PetscScalar *aa = a->a; 21 PetscBool *done; 22 PetscReal best,past = 0,future; 23 24 PetscFunctionBegin; 25 /* pick initial row */ 26 best = -1; 27 for (i=0; i<n; i++) { 28 future = 0.0; 29 for (j=ai[i]; j<ai[i+1]; j++) { 30 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 31 else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 42 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 96 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 97 { 98 PetscFunctionBegin; 99 *flg = PETSC_TRUE; 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 105 PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 106 { 107 PetscInt n = A->rmap->n; 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 #if defined(PETSC_USE_COMPLEX) 112 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 113 #endif 114 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 115 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 116 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 117 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 118 119 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 120 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 121 122 ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 123 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 124 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 125 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 126 127 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 128 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 129 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 130 (*B)->factortype = ftype; 131 PetscFunctionReturn(0); 132 } 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 137 { 138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 139 IS isicol; 140 PetscErrorCode ierr; 141 const PetscInt *r,*ic; 142 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 143 PetscInt *bi,*bj,*ajtmp; 144 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 145 PetscReal f; 146 PetscInt nlnk,*lnk,k,**bi_ptr; 147 PetscFreeSpaceList free_space=NULL,current_space=NULL; 148 PetscBT lnkbt; 149 150 PetscFunctionBegin; 151 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 152 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 153 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 154 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 155 156 /* get new row pointers */ 157 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 158 bi[0] = 0; 159 160 /* bdiag is location of diagonal in factor */ 161 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 162 bdiag[0] = 0; 163 164 /* linked list for storing column indices of the active row */ 165 nlnk = n + 1; 166 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 167 168 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 169 170 /* initial FreeSpace size is f*(ai[n]+1) */ 171 f = info->fill; 172 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 173 current_space = free_space; 174 175 for (i=0; i<n; i++) { 176 /* copy previous fill into linked list */ 177 nzi = 0; 178 nnz = ai[r[i]+1] - ai[r[i]]; 179 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 180 ajtmp = aj + ai[r[i]]; 181 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 182 nzi += nlnk; 183 184 /* add pivot rows into linked list */ 185 row = lnk[n]; 186 while (row < i) { 187 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 188 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 189 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 190 nzi += nlnk; 191 row = lnk[row]; 192 } 193 bi[i+1] = bi[i] + nzi; 194 im[i] = nzi; 195 196 /* mark bdiag */ 197 nzbd = 0; 198 nnz = nzi; 199 k = lnk[n]; 200 while (nnz-- && k < i) { 201 nzbd++; 202 k = lnk[k]; 203 } 204 bdiag[i] = bi[i] + nzbd; 205 206 /* if free space is not available, make more free space */ 207 if (current_space->local_remaining<nzi) { 208 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 209 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 210 reallocs++; 211 } 212 213 /* copy data into free space, then initialize lnk */ 214 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 215 216 bi_ptr[i] = current_space->array; 217 current_space->array += nzi; 218 current_space->local_used += nzi; 219 current_space->local_remaining -= nzi; 220 } 221 #if defined(PETSC_USE_INFO) 222 if (ai[n] != 0) { 223 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 224 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 225 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 226 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 227 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 228 } else { 229 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 230 } 231 #endif 232 233 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 234 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 235 236 /* destroy list of free space and other temporary array(s) */ 237 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 238 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 239 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 240 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 241 242 /* put together the new matrix */ 243 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 244 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 245 b = (Mat_SeqAIJ*)(B)->data; 246 247 b->free_a = PETSC_TRUE; 248 b->free_ij = PETSC_TRUE; 249 b->singlemalloc = PETSC_FALSE; 250 251 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 252 b->j = bj; 253 b->i = bi; 254 b->diag = bdiag; 255 b->ilen = 0; 256 b->imax = 0; 257 b->row = isrow; 258 b->col = iscol; 259 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 260 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 261 b->icol = isicol; 262 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 263 264 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 265 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 266 b->maxnz = b->nz = bi[n]; 267 268 (B)->factortype = MAT_FACTOR_LU; 269 (B)->info.factor_mallocs = reallocs; 270 (B)->info.fill_ratio_given = f; 271 272 if (ai[n]) { 273 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 274 } else { 275 (B)->info.fill_ratio_needed = 0.0; 276 } 277 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 278 if (a->inode.size) { 279 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 280 } 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 286 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 287 { 288 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 289 IS isicol; 290 PetscErrorCode ierr; 291 const PetscInt *r,*ic; 292 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 293 PetscInt *bi,*bj,*ajtmp; 294 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 295 PetscReal f; 296 PetscInt nlnk,*lnk,k,**bi_ptr; 297 PetscFreeSpaceList free_space=NULL,current_space=NULL; 298 PetscBT lnkbt; 299 300 PetscFunctionBegin; 301 /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 302 /* 303 PetscBool olddatastruct=PETSC_FALSE; 304 ierr = PetscOptionsGetBool(NULL,"-lu_old",&olddatastruct,NULL);CHKERRQ(ierr); 305 if (olddatastruct) { 306 ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 */ 310 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 311 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 312 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 313 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 314 315 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 316 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 317 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 318 bi[0] = bdiag[0] = 0; 319 320 /* linked list for storing column indices of the active row */ 321 nlnk = n + 1; 322 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 323 324 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 325 326 /* initial FreeSpace size is f*(ai[n]+1) */ 327 f = info->fill; 328 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 329 current_space = free_space; 330 331 for (i=0; i<n; i++) { 332 /* copy previous fill into linked list */ 333 nzi = 0; 334 nnz = ai[r[i]+1] - ai[r[i]]; 335 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 336 ajtmp = aj + ai[r[i]]; 337 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 338 nzi += nlnk; 339 340 /* add pivot rows into linked list */ 341 row = lnk[n]; 342 while (row < i) { 343 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 344 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 345 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 346 nzi += nlnk; 347 row = lnk[row]; 348 } 349 bi[i+1] = bi[i] + nzi; 350 im[i] = nzi; 351 352 /* mark bdiag */ 353 nzbd = 0; 354 nnz = nzi; 355 k = lnk[n]; 356 while (nnz-- && k < i) { 357 nzbd++; 358 k = lnk[k]; 359 } 360 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 361 362 /* if free space is not available, make more free space */ 363 if (current_space->local_remaining<nzi) { 364 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 365 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 366 reallocs++; 367 } 368 369 /* copy data into free space, then initialize lnk */ 370 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 371 372 bi_ptr[i] = current_space->array; 373 current_space->array += nzi; 374 current_space->local_used += nzi; 375 current_space->local_remaining -= nzi; 376 } 377 378 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 379 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 380 381 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 382 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 383 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 384 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 385 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 386 387 /* put together the new matrix */ 388 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 389 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 390 b = (Mat_SeqAIJ*)(B)->data; 391 392 b->free_a = PETSC_TRUE; 393 b->free_ij = PETSC_TRUE; 394 b->singlemalloc = PETSC_FALSE; 395 396 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 397 398 b->j = bj; 399 b->i = bi; 400 b->diag = bdiag; 401 b->ilen = 0; 402 b->imax = 0; 403 b->row = isrow; 404 b->col = iscol; 405 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 406 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 407 b->icol = isicol; 408 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 409 410 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 411 ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 412 b->maxnz = b->nz = bdiag[0]+1; 413 414 B->factortype = MAT_FACTOR_LU; 415 B->info.factor_mallocs = reallocs; 416 B->info.fill_ratio_given = f; 417 418 if (ai[n]) { 419 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 420 } else { 421 B->info.fill_ratio_needed = 0.0; 422 } 423 #if defined(PETSC_USE_INFO) 424 if (ai[n] != 0) { 425 PetscReal af = B->info.fill_ratio_needed; 426 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 427 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 428 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 429 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 430 } else { 431 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 432 } 433 #endif 434 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 435 if (a->inode.size) { 436 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 437 } 438 PetscFunctionReturn(0); 439 } 440 441 /* 442 Trouble in factorization, should we dump the original matrix? 443 */ 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatFactorDumpMatrix" 446 PetscErrorCode MatFactorDumpMatrix(Mat A) 447 { 448 PetscErrorCode ierr; 449 PetscBool flg = PETSC_FALSE; 450 451 PetscFunctionBegin; 452 ierr = PetscOptionsGetBool(NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr); 453 if (flg) { 454 PetscViewer viewer; 455 char filename[PETSC_MAX_PATH_LEN]; 456 457 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 458 ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 459 ierr = MatView(A,viewer);CHKERRQ(ierr); 460 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 461 } 462 PetscFunctionReturn(0); 463 } 464 465 #undef __FUNCT__ 466 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 467 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 468 { 469 Mat C =B; 470 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 471 IS isrow = b->row,isicol = b->icol; 472 PetscErrorCode ierr; 473 const PetscInt *r,*ic,*ics; 474 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 475 PetscInt i,j,k,nz,nzL,row,*pj; 476 const PetscInt *ajtmp,*bjtmp; 477 MatScalar *rtmp,*pc,multiplier,*pv; 478 const MatScalar *aa=a->a,*v; 479 PetscBool row_identity,col_identity; 480 FactorShiftCtx sctx; 481 const PetscInt *ddiag; 482 PetscReal rs; 483 MatScalar d; 484 485 PetscFunctionBegin; 486 /* MatPivotSetUp(): initialize shift context sctx */ 487 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 488 489 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 490 ddiag = a->diag; 491 sctx.shift_top = info->zeropivot; 492 for (i=0; i<n; i++) { 493 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 494 d = (aa)[ddiag[i]]; 495 rs = -PetscAbsScalar(d) - PetscRealPart(d); 496 v = aa+ai[i]; 497 nz = ai[i+1] - ai[i]; 498 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 499 if (rs>sctx.shift_top) sctx.shift_top = rs; 500 } 501 sctx.shift_top *= 1.1; 502 sctx.nshift_max = 5; 503 sctx.shift_lo = 0.; 504 sctx.shift_hi = 1.; 505 } 506 507 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 508 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 509 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 510 ics = ic; 511 512 do { 513 sctx.newshift = PETSC_FALSE; 514 for (i=0; i<n; i++) { 515 /* zero rtmp */ 516 /* L part */ 517 nz = bi[i+1] - bi[i]; 518 bjtmp = bj + bi[i]; 519 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 520 521 /* U part */ 522 nz = bdiag[i]-bdiag[i+1]; 523 bjtmp = bj + bdiag[i+1]+1; 524 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 525 526 /* load in initial (unfactored row) */ 527 nz = ai[r[i]+1] - ai[r[i]]; 528 ajtmp = aj + ai[r[i]]; 529 v = aa + ai[r[i]]; 530 for (j=0; j<nz; j++) { 531 rtmp[ics[ajtmp[j]]] = v[j]; 532 } 533 /* ZeropivotApply() */ 534 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 535 536 /* elimination */ 537 bjtmp = bj + bi[i]; 538 row = *bjtmp++; 539 nzL = bi[i+1] - bi[i]; 540 for (k=0; k < nzL; k++) { 541 pc = rtmp + row; 542 if (*pc != 0.0) { 543 pv = b->a + bdiag[row]; 544 multiplier = *pc * (*pv); 545 *pc = multiplier; 546 547 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 548 pv = b->a + bdiag[row+1]+1; 549 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 550 551 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 552 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 553 } 554 row = *bjtmp++; 555 } 556 557 /* finished row so stick it into b->a */ 558 rs = 0.0; 559 /* L part */ 560 pv = b->a + bi[i]; 561 pj = b->j + bi[i]; 562 nz = bi[i+1] - bi[i]; 563 for (j=0; j<nz; j++) { 564 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 565 } 566 567 /* U part */ 568 pv = b->a + bdiag[i+1]+1; 569 pj = b->j + bdiag[i+1]+1; 570 nz = bdiag[i] - bdiag[i+1]-1; 571 for (j=0; j<nz; j++) { 572 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 573 } 574 575 sctx.rs = rs; 576 sctx.pv = rtmp[i]; 577 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 578 if (sctx.newshift) break; /* break for-loop */ 579 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 580 581 /* Mark diagonal and invert diagonal for simplier triangular solves */ 582 pv = b->a + bdiag[i]; 583 *pv = 1.0/rtmp[i]; 584 585 } /* endof for (i=0; i<n; i++) { */ 586 587 /* MatPivotRefine() */ 588 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 589 /* 590 * if no shift in this attempt & shifting & started shifting & can refine, 591 * then try lower shift 592 */ 593 sctx.shift_hi = sctx.shift_fraction; 594 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 595 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 596 sctx.newshift = PETSC_TRUE; 597 sctx.nshift++; 598 } 599 } while (sctx.newshift); 600 601 ierr = PetscFree(rtmp);CHKERRQ(ierr); 602 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 603 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 604 605 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 606 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 607 if (row_identity && col_identity) { 608 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 609 } else { 610 C->ops->solve = MatSolve_SeqAIJ; 611 } 612 C->ops->solveadd = MatSolveAdd_SeqAIJ; 613 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 614 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 615 C->ops->matsolve = MatMatSolve_SeqAIJ; 616 C->assembled = PETSC_TRUE; 617 C->preallocated = PETSC_TRUE; 618 619 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 620 621 /* MatShiftView(A,info,&sctx) */ 622 if (sctx.nshift) { 623 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 624 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 625 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 626 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 627 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 628 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 629 } 630 } 631 ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 #undef __FUNCT__ 636 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 637 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 638 { 639 Mat C =B; 640 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 641 IS isrow = b->row,isicol = b->icol; 642 PetscErrorCode ierr; 643 const PetscInt *r,*ic,*ics; 644 PetscInt nz,row,i,j,n=A->rmap->n,diag; 645 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 646 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 647 MatScalar *pv,*rtmp,*pc,multiplier,d; 648 const MatScalar *v,*aa=a->a; 649 PetscReal rs=0.0; 650 FactorShiftCtx sctx; 651 const PetscInt *ddiag; 652 PetscBool row_identity, col_identity; 653 654 PetscFunctionBegin; 655 /* MatPivotSetUp(): initialize shift context sctx */ 656 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 657 658 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 659 ddiag = a->diag; 660 sctx.shift_top = info->zeropivot; 661 for (i=0; i<n; i++) { 662 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 663 d = (aa)[ddiag[i]]; 664 rs = -PetscAbsScalar(d) - PetscRealPart(d); 665 v = aa+ai[i]; 666 nz = ai[i+1] - ai[i]; 667 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 668 if (rs>sctx.shift_top) sctx.shift_top = rs; 669 } 670 sctx.shift_top *= 1.1; 671 sctx.nshift_max = 5; 672 sctx.shift_lo = 0.; 673 sctx.shift_hi = 1.; 674 } 675 676 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 677 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 678 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 679 ics = ic; 680 681 do { 682 sctx.newshift = PETSC_FALSE; 683 for (i=0; i<n; i++) { 684 nz = bi[i+1] - bi[i]; 685 bjtmp = bj + bi[i]; 686 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 687 688 /* load in initial (unfactored row) */ 689 nz = ai[r[i]+1] - ai[r[i]]; 690 ajtmp = aj + ai[r[i]]; 691 v = aa + ai[r[i]]; 692 for (j=0; j<nz; j++) { 693 rtmp[ics[ajtmp[j]]] = v[j]; 694 } 695 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 696 697 row = *bjtmp++; 698 while (row < i) { 699 pc = rtmp + row; 700 if (*pc != 0.0) { 701 pv = b->a + diag_offset[row]; 702 pj = b->j + diag_offset[row] + 1; 703 multiplier = *pc / *pv++; 704 *pc = multiplier; 705 nz = bi[row+1] - diag_offset[row] - 1; 706 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 707 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 708 } 709 row = *bjtmp++; 710 } 711 /* finished row so stick it into b->a */ 712 pv = b->a + bi[i]; 713 pj = b->j + bi[i]; 714 nz = bi[i+1] - bi[i]; 715 diag = diag_offset[i] - bi[i]; 716 rs = 0.0; 717 for (j=0; j<nz; j++) { 718 pv[j] = rtmp[pj[j]]; 719 rs += PetscAbsScalar(pv[j]); 720 } 721 rs -= PetscAbsScalar(pv[diag]); 722 723 sctx.rs = rs; 724 sctx.pv = pv[diag]; 725 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 726 if (sctx.newshift) break; 727 pv[diag] = sctx.pv; 728 } 729 730 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 731 /* 732 * if no shift in this attempt & shifting & started shifting & can refine, 733 * then try lower shift 734 */ 735 sctx.shift_hi = sctx.shift_fraction; 736 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 737 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 738 sctx.newshift = PETSC_TRUE; 739 sctx.nshift++; 740 } 741 } while (sctx.newshift); 742 743 /* invert diagonal entries for simplier triangular solves */ 744 for (i=0; i<n; i++) { 745 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 746 } 747 ierr = PetscFree(rtmp);CHKERRQ(ierr); 748 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 749 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 750 751 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 752 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 753 if (row_identity && col_identity) { 754 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 755 } else { 756 C->ops->solve = MatSolve_SeqAIJ_inplace; 757 } 758 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 759 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 760 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 761 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 762 763 C->assembled = PETSC_TRUE; 764 C->preallocated = PETSC_TRUE; 765 766 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 767 if (sctx.nshift) { 768 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 769 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 770 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 771 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 772 } 773 } 774 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 775 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 776 777 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 /* 782 This routine implements inplace ILU(0) with row or/and column permutations. 783 Input: 784 A - original matrix 785 Output; 786 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 787 a->j (col index) is permuted by the inverse of colperm, then sorted 788 a->a reordered accordingly with a->j 789 a->diag (ptr to diagonal elements) is updated. 790 */ 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 793 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 794 { 795 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 796 IS isrow = a->row,isicol = a->icol; 797 PetscErrorCode ierr; 798 const PetscInt *r,*ic,*ics; 799 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 800 PetscInt *ajtmp,nz,row; 801 PetscInt *diag = a->diag,nbdiag,*pj; 802 PetscScalar *rtmp,*pc,multiplier,d; 803 MatScalar *pv,*v; 804 PetscReal rs; 805 FactorShiftCtx sctx; 806 const MatScalar *aa=a->a,*vtmp; 807 808 PetscFunctionBegin; 809 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 810 811 /* MatPivotSetUp(): initialize shift context sctx */ 812 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 813 814 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 815 const PetscInt *ddiag = a->diag; 816 sctx.shift_top = info->zeropivot; 817 for (i=0; i<n; i++) { 818 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 819 d = (aa)[ddiag[i]]; 820 rs = -PetscAbsScalar(d) - PetscRealPart(d); 821 vtmp = aa+ai[i]; 822 nz = ai[i+1] - ai[i]; 823 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 824 if (rs>sctx.shift_top) sctx.shift_top = rs; 825 } 826 sctx.shift_top *= 1.1; 827 sctx.nshift_max = 5; 828 sctx.shift_lo = 0.; 829 sctx.shift_hi = 1.; 830 } 831 832 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 833 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 834 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 835 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 836 ics = ic; 837 838 #if defined(MV) 839 sctx.shift_top = 0.; 840 sctx.nshift_max = 0; 841 sctx.shift_lo = 0.; 842 sctx.shift_hi = 0.; 843 sctx.shift_fraction = 0.; 844 845 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 846 sctx.shift_top = 0.; 847 for (i=0; i<n; i++) { 848 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 849 d = (a->a)[diag[i]]; 850 rs = -PetscAbsScalar(d) - PetscRealPart(d); 851 v = a->a+ai[i]; 852 nz = ai[i+1] - ai[i]; 853 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 854 if (rs>sctx.shift_top) sctx.shift_top = rs; 855 } 856 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 857 sctx.shift_top *= 1.1; 858 sctx.nshift_max = 5; 859 sctx.shift_lo = 0.; 860 sctx.shift_hi = 1.; 861 } 862 863 sctx.shift_amount = 0.; 864 sctx.nshift = 0; 865 #endif 866 867 do { 868 sctx.newshift = PETSC_FALSE; 869 for (i=0; i<n; i++) { 870 /* load in initial unfactored row */ 871 nz = ai[r[i]+1] - ai[r[i]]; 872 ajtmp = aj + ai[r[i]]; 873 v = a->a + ai[r[i]]; 874 /* sort permuted ajtmp and values v accordingly */ 875 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 876 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 877 878 diag[r[i]] = ai[r[i]]; 879 for (j=0; j<nz; j++) { 880 rtmp[ajtmp[j]] = v[j]; 881 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 882 } 883 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 884 885 row = *ajtmp++; 886 while (row < i) { 887 pc = rtmp + row; 888 if (*pc != 0.0) { 889 pv = a->a + diag[r[row]]; 890 pj = aj + diag[r[row]] + 1; 891 892 multiplier = *pc / *pv++; 893 *pc = multiplier; 894 nz = ai[r[row]+1] - diag[r[row]] - 1; 895 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 896 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 897 } 898 row = *ajtmp++; 899 } 900 /* finished row so overwrite it onto a->a */ 901 pv = a->a + ai[r[i]]; 902 pj = aj + ai[r[i]]; 903 nz = ai[r[i]+1] - ai[r[i]]; 904 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 905 906 rs = 0.0; 907 for (j=0; j<nz; j++) { 908 pv[j] = rtmp[pj[j]]; 909 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 910 } 911 912 sctx.rs = rs; 913 sctx.pv = pv[nbdiag]; 914 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 915 if (sctx.newshift) break; 916 pv[nbdiag] = sctx.pv; 917 } 918 919 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 920 /* 921 * if no shift in this attempt & shifting & started shifting & can refine, 922 * then try lower shift 923 */ 924 sctx.shift_hi = sctx.shift_fraction; 925 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 926 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 927 sctx.newshift = PETSC_TRUE; 928 sctx.nshift++; 929 } 930 } while (sctx.newshift); 931 932 /* invert diagonal entries for simplier triangular solves */ 933 for (i=0; i<n; i++) { 934 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 935 } 936 937 ierr = PetscFree(rtmp);CHKERRQ(ierr); 938 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 939 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 940 941 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 942 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 943 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 944 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 945 946 A->assembled = PETSC_TRUE; 947 A->preallocated = PETSC_TRUE; 948 949 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 950 if (sctx.nshift) { 951 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 952 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 953 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 954 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 955 } 956 } 957 PetscFunctionReturn(0); 958 } 959 960 /* ----------------------------------------------------------- */ 961 #undef __FUNCT__ 962 #define __FUNCT__ "MatLUFactor_SeqAIJ" 963 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 964 { 965 PetscErrorCode ierr; 966 Mat C; 967 968 PetscFunctionBegin; 969 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 970 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 971 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 972 973 A->ops->solve = C->ops->solve; 974 A->ops->solvetranspose = C->ops->solvetranspose; 975 976 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 977 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 /* ----------------------------------------------------------- */ 981 982 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 985 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 986 { 987 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 988 IS iscol = a->col,isrow = a->row; 989 PetscErrorCode ierr; 990 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 991 PetscInt nz; 992 const PetscInt *rout,*cout,*r,*c; 993 PetscScalar *x,*tmp,*tmps,sum; 994 const PetscScalar *b; 995 const MatScalar *aa = a->a,*v; 996 997 PetscFunctionBegin; 998 if (!n) PetscFunctionReturn(0); 999 1000 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1001 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1002 tmp = a->solve_work; 1003 1004 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1005 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1006 1007 /* forward solve the lower triangular */ 1008 tmp[0] = b[*r++]; 1009 tmps = tmp; 1010 for (i=1; i<n; i++) { 1011 v = aa + ai[i]; 1012 vi = aj + ai[i]; 1013 nz = a->diag[i] - ai[i]; 1014 sum = b[*r++]; 1015 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1016 tmp[i] = sum; 1017 } 1018 1019 /* backward solve the upper triangular */ 1020 for (i=n-1; i>=0; i--) { 1021 v = aa + a->diag[i] + 1; 1022 vi = aj + a->diag[i] + 1; 1023 nz = ai[i+1] - a->diag[i] - 1; 1024 sum = tmp[i]; 1025 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1026 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1027 } 1028 1029 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1030 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1031 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1032 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1033 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1039 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1040 { 1041 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1042 IS iscol = a->col,isrow = a->row; 1043 PetscErrorCode ierr; 1044 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1045 PetscInt nz,neq; 1046 const PetscInt *rout,*cout,*r,*c; 1047 PetscScalar *x,*b,*tmp,*tmps,sum; 1048 const MatScalar *aa = a->a,*v; 1049 PetscBool bisdense,xisdense; 1050 1051 PetscFunctionBegin; 1052 if (!n) PetscFunctionReturn(0); 1053 1054 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1055 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1056 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1057 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1058 1059 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1060 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1061 1062 tmp = a->solve_work; 1063 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1064 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1065 1066 for (neq=0; neq<B->cmap->n; neq++) { 1067 /* forward solve the lower triangular */ 1068 tmp[0] = b[r[0]]; 1069 tmps = tmp; 1070 for (i=1; i<n; i++) { 1071 v = aa + ai[i]; 1072 vi = aj + ai[i]; 1073 nz = a->diag[i] - ai[i]; 1074 sum = b[r[i]]; 1075 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1076 tmp[i] = sum; 1077 } 1078 /* backward solve the upper triangular */ 1079 for (i=n-1; i>=0; i--) { 1080 v = aa + a->diag[i] + 1; 1081 vi = aj + a->diag[i] + 1; 1082 nz = ai[i+1] - a->diag[i] - 1; 1083 sum = tmp[i]; 1084 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1085 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1086 } 1087 1088 b += n; 1089 x += n; 1090 } 1091 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1092 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1093 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1094 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1095 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1101 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1102 { 1103 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1104 IS iscol = a->col,isrow = a->row; 1105 PetscErrorCode ierr; 1106 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1107 PetscInt nz,neq; 1108 const PetscInt *rout,*cout,*r,*c; 1109 PetscScalar *x,*b,*tmp,sum; 1110 const MatScalar *aa = a->a,*v; 1111 PetscBool bisdense,xisdense; 1112 1113 PetscFunctionBegin; 1114 if (!n) PetscFunctionReturn(0); 1115 1116 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1117 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1118 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1119 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1120 1121 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1122 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1123 1124 tmp = a->solve_work; 1125 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1126 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1127 1128 for (neq=0; neq<B->cmap->n; neq++) { 1129 /* forward solve the lower triangular */ 1130 tmp[0] = b[r[0]]; 1131 v = aa; 1132 vi = aj; 1133 for (i=1; i<n; i++) { 1134 nz = ai[i+1] - ai[i]; 1135 sum = b[r[i]]; 1136 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1137 tmp[i] = sum; 1138 v += nz; vi += nz; 1139 } 1140 1141 /* backward solve the upper triangular */ 1142 for (i=n-1; i>=0; i--) { 1143 v = aa + adiag[i+1]+1; 1144 vi = aj + adiag[i+1]+1; 1145 nz = adiag[i]-adiag[i+1]-1; 1146 sum = tmp[i]; 1147 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1148 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1149 } 1150 1151 b += n; 1152 x += n; 1153 } 1154 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1155 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1156 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1157 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1158 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1164 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1165 { 1166 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1167 IS iscol = a->col,isrow = a->row; 1168 PetscErrorCode ierr; 1169 const PetscInt *r,*c,*rout,*cout; 1170 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1171 PetscInt nz,row; 1172 PetscScalar *x,*b,*tmp,*tmps,sum; 1173 const MatScalar *aa = a->a,*v; 1174 1175 PetscFunctionBegin; 1176 if (!n) PetscFunctionReturn(0); 1177 1178 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1179 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1180 tmp = a->solve_work; 1181 1182 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1183 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1184 1185 /* forward solve the lower triangular */ 1186 tmp[0] = b[*r++]; 1187 tmps = tmp; 1188 for (row=1; row<n; row++) { 1189 i = rout[row]; /* permuted row */ 1190 v = aa + ai[i]; 1191 vi = aj + ai[i]; 1192 nz = a->diag[i] - ai[i]; 1193 sum = b[*r++]; 1194 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1195 tmp[row] = sum; 1196 } 1197 1198 /* backward solve the upper triangular */ 1199 for (row=n-1; row>=0; row--) { 1200 i = rout[row]; /* permuted row */ 1201 v = aa + a->diag[i] + 1; 1202 vi = aj + a->diag[i] + 1; 1203 nz = ai[i+1] - a->diag[i] - 1; 1204 sum = tmp[row]; 1205 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1206 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1207 } 1208 1209 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1210 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1211 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1212 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1213 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 /* ----------------------------------------------------------- */ 1218 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1221 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1222 { 1223 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1224 PetscErrorCode ierr; 1225 PetscInt n = A->rmap->n; 1226 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1227 PetscScalar *x; 1228 const PetscScalar *b; 1229 const MatScalar *aa = a->a; 1230 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1231 PetscInt adiag_i,i,nz,ai_i; 1232 const PetscInt *vi; 1233 const MatScalar *v; 1234 PetscScalar sum; 1235 #endif 1236 1237 PetscFunctionBegin; 1238 if (!n) PetscFunctionReturn(0); 1239 1240 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1242 1243 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1244 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1245 #else 1246 /* forward solve the lower triangular */ 1247 x[0] = b[0]; 1248 for (i=1; i<n; i++) { 1249 ai_i = ai[i]; 1250 v = aa + ai_i; 1251 vi = aj + ai_i; 1252 nz = adiag[i] - ai_i; 1253 sum = b[i]; 1254 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1255 x[i] = sum; 1256 } 1257 1258 /* backward solve the upper triangular */ 1259 for (i=n-1; i>=0; i--) { 1260 adiag_i = adiag[i]; 1261 v = aa + adiag_i + 1; 1262 vi = aj + adiag_i + 1; 1263 nz = ai[i+1] - adiag_i - 1; 1264 sum = x[i]; 1265 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1266 x[i] = sum*aa[adiag_i]; 1267 } 1268 #endif 1269 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1270 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1271 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1277 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1278 { 1279 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1280 IS iscol = a->col,isrow = a->row; 1281 PetscErrorCode ierr; 1282 PetscInt i, n = A->rmap->n,j; 1283 PetscInt nz; 1284 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1285 PetscScalar *x,*tmp,sum; 1286 const PetscScalar *b; 1287 const MatScalar *aa = a->a,*v; 1288 1289 PetscFunctionBegin; 1290 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1291 1292 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1293 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1294 tmp = a->solve_work; 1295 1296 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1297 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1298 1299 /* forward solve the lower triangular */ 1300 tmp[0] = b[*r++]; 1301 for (i=1; i<n; i++) { 1302 v = aa + ai[i]; 1303 vi = aj + ai[i]; 1304 nz = a->diag[i] - ai[i]; 1305 sum = b[*r++]; 1306 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1307 tmp[i] = sum; 1308 } 1309 1310 /* backward solve the upper triangular */ 1311 for (i=n-1; i>=0; i--) { 1312 v = aa + a->diag[i] + 1; 1313 vi = aj + a->diag[i] + 1; 1314 nz = ai[i+1] - a->diag[i] - 1; 1315 sum = tmp[i]; 1316 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1317 tmp[i] = sum*aa[a->diag[i]]; 1318 x[*c--] += tmp[i]; 1319 } 1320 1321 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1322 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1323 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1324 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1325 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1326 PetscFunctionReturn(0); 1327 } 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1331 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1332 { 1333 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1334 IS iscol = a->col,isrow = a->row; 1335 PetscErrorCode ierr; 1336 PetscInt i, n = A->rmap->n,j; 1337 PetscInt nz; 1338 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1339 PetscScalar *x,*tmp,sum; 1340 const PetscScalar *b; 1341 const MatScalar *aa = a->a,*v; 1342 1343 PetscFunctionBegin; 1344 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1345 1346 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1347 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1348 tmp = a->solve_work; 1349 1350 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1351 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1352 1353 /* forward solve the lower triangular */ 1354 tmp[0] = b[r[0]]; 1355 v = aa; 1356 vi = aj; 1357 for (i=1; i<n; i++) { 1358 nz = ai[i+1] - ai[i]; 1359 sum = b[r[i]]; 1360 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1361 tmp[i] = sum; 1362 v += nz; 1363 vi += nz; 1364 } 1365 1366 /* backward solve the upper triangular */ 1367 v = aa + adiag[n-1]; 1368 vi = aj + adiag[n-1]; 1369 for (i=n-1; i>=0; i--) { 1370 nz = adiag[i] - adiag[i+1] - 1; 1371 sum = tmp[i]; 1372 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1373 tmp[i] = sum*v[nz]; 1374 x[c[i]] += tmp[i]; 1375 v += nz+1; vi += nz+1; 1376 } 1377 1378 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1379 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1380 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1381 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1382 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 #undef __FUNCT__ 1387 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1388 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1389 { 1390 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1391 IS iscol = a->col,isrow = a->row; 1392 PetscErrorCode ierr; 1393 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1394 PetscInt i,n = A->rmap->n,j; 1395 PetscInt nz; 1396 PetscScalar *x,*tmp,s1; 1397 const MatScalar *aa = a->a,*v; 1398 const PetscScalar *b; 1399 1400 PetscFunctionBegin; 1401 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1402 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1403 tmp = a->solve_work; 1404 1405 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1406 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1407 1408 /* copy the b into temp work space according to permutation */ 1409 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1410 1411 /* forward solve the U^T */ 1412 for (i=0; i<n; i++) { 1413 v = aa + diag[i]; 1414 vi = aj + diag[i] + 1; 1415 nz = ai[i+1] - diag[i] - 1; 1416 s1 = tmp[i]; 1417 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1418 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1419 tmp[i] = s1; 1420 } 1421 1422 /* backward solve the L^T */ 1423 for (i=n-1; i>=0; i--) { 1424 v = aa + diag[i] - 1; 1425 vi = aj + diag[i] - 1; 1426 nz = diag[i] - ai[i]; 1427 s1 = tmp[i]; 1428 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1429 } 1430 1431 /* copy tmp into x according to permutation */ 1432 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1433 1434 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1435 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1436 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1437 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1438 1439 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1440 PetscFunctionReturn(0); 1441 } 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1445 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1446 { 1447 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1448 IS iscol = a->col,isrow = a->row; 1449 PetscErrorCode ierr; 1450 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1451 PetscInt i,n = A->rmap->n,j; 1452 PetscInt nz; 1453 PetscScalar *x,*tmp,s1; 1454 const MatScalar *aa = a->a,*v; 1455 const PetscScalar *b; 1456 1457 PetscFunctionBegin; 1458 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1459 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1460 tmp = a->solve_work; 1461 1462 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1463 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1464 1465 /* copy the b into temp work space according to permutation */ 1466 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1467 1468 /* forward solve the U^T */ 1469 for (i=0; i<n; i++) { 1470 v = aa + adiag[i+1] + 1; 1471 vi = aj + adiag[i+1] + 1; 1472 nz = adiag[i] - adiag[i+1] - 1; 1473 s1 = tmp[i]; 1474 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1475 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1476 tmp[i] = s1; 1477 } 1478 1479 /* backward solve the L^T */ 1480 for (i=n-1; i>=0; i--) { 1481 v = aa + ai[i]; 1482 vi = aj + ai[i]; 1483 nz = ai[i+1] - ai[i]; 1484 s1 = tmp[i]; 1485 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1486 } 1487 1488 /* copy tmp into x according to permutation */ 1489 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1490 1491 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1492 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1493 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1494 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1495 1496 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1502 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1503 { 1504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1505 IS iscol = a->col,isrow = a->row; 1506 PetscErrorCode ierr; 1507 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1508 PetscInt i,n = A->rmap->n,j; 1509 PetscInt nz; 1510 PetscScalar *x,*tmp,s1; 1511 const MatScalar *aa = a->a,*v; 1512 const PetscScalar *b; 1513 1514 PetscFunctionBegin; 1515 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1516 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1517 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1518 tmp = a->solve_work; 1519 1520 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1521 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1522 1523 /* copy the b into temp work space according to permutation */ 1524 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1525 1526 /* forward solve the U^T */ 1527 for (i=0; i<n; i++) { 1528 v = aa + diag[i]; 1529 vi = aj + diag[i] + 1; 1530 nz = ai[i+1] - diag[i] - 1; 1531 s1 = tmp[i]; 1532 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1533 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1534 tmp[i] = s1; 1535 } 1536 1537 /* backward solve the L^T */ 1538 for (i=n-1; i>=0; i--) { 1539 v = aa + diag[i] - 1; 1540 vi = aj + diag[i] - 1; 1541 nz = diag[i] - ai[i]; 1542 s1 = tmp[i]; 1543 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1544 } 1545 1546 /* copy tmp into x according to permutation */ 1547 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1548 1549 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1550 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1551 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1552 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1553 1554 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1560 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1561 { 1562 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1563 IS iscol = a->col,isrow = a->row; 1564 PetscErrorCode ierr; 1565 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1566 PetscInt i,n = A->rmap->n,j; 1567 PetscInt nz; 1568 PetscScalar *x,*tmp,s1; 1569 const MatScalar *aa = a->a,*v; 1570 const PetscScalar *b; 1571 1572 PetscFunctionBegin; 1573 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1574 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1575 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1576 tmp = a->solve_work; 1577 1578 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1579 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1580 1581 /* copy the b into temp work space according to permutation */ 1582 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1583 1584 /* forward solve the U^T */ 1585 for (i=0; i<n; i++) { 1586 v = aa + adiag[i+1] + 1; 1587 vi = aj + adiag[i+1] + 1; 1588 nz = adiag[i] - adiag[i+1] - 1; 1589 s1 = tmp[i]; 1590 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1591 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1592 tmp[i] = s1; 1593 } 1594 1595 1596 /* backward solve the L^T */ 1597 for (i=n-1; i>=0; i--) { 1598 v = aa + ai[i]; 1599 vi = aj + ai[i]; 1600 nz = ai[i+1] - ai[i]; 1601 s1 = tmp[i]; 1602 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1603 } 1604 1605 /* copy tmp into x according to permutation */ 1606 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1607 1608 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1609 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1610 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1611 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1612 1613 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 /* ----------------------------------------------------------------*/ 1618 1619 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool); 1620 1621 /* 1622 ilu() under revised new data structure. 1623 Factored arrays bj and ba are stored as 1624 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1625 1626 bi=fact->i is an array of size n+1, in which 1627 bi+ 1628 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1629 bi[n]: points to L(n-1,n-1)+1 1630 1631 bdiag=fact->diag is an array of size n+1,in which 1632 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1633 bdiag[n]: points to entry of U(n-1,0)-1 1634 1635 U(i,:) contains bdiag[i] as its last entry, i.e., 1636 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1637 */ 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1640 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1641 { 1642 1643 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1644 PetscErrorCode ierr; 1645 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1646 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1647 PetscBool missing; 1648 IS isicol; 1649 1650 PetscFunctionBegin; 1651 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1652 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1653 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1654 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1655 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1656 b = (Mat_SeqAIJ*)(fact)->data; 1657 1658 /* allocate matrix arrays for new data structure */ 1659 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1660 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1661 1662 b->singlemalloc = PETSC_TRUE; 1663 if (!b->diag) { 1664 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1665 ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1666 } 1667 bdiag = b->diag; 1668 1669 if (n > 0) { 1670 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1671 } 1672 1673 /* set bi and bj with new data structure */ 1674 bi = b->i; 1675 bj = b->j; 1676 1677 /* L part */ 1678 bi[0] = 0; 1679 for (i=0; i<n; i++) { 1680 nz = adiag[i] - ai[i]; 1681 bi[i+1] = bi[i] + nz; 1682 aj = a->j + ai[i]; 1683 for (j=0; j<nz; j++) { 1684 /* *bj = aj[j]; bj++; */ 1685 bj[k++] = aj[j]; 1686 } 1687 } 1688 1689 /* U part */ 1690 bdiag[n] = bi[n]-1; 1691 for (i=n-1; i>=0; i--) { 1692 nz = ai[i+1] - adiag[i] - 1; 1693 aj = a->j + adiag[i] + 1; 1694 for (j=0; j<nz; j++) { 1695 /* *bj = aj[j]; bj++; */ 1696 bj[k++] = aj[j]; 1697 } 1698 /* diag[i] */ 1699 /* *bj = i; bj++; */ 1700 bj[k++] = i; 1701 bdiag[i] = bdiag[i+1] + nz + 1; 1702 } 1703 1704 fact->factortype = MAT_FACTOR_ILU; 1705 fact->info.factor_mallocs = 0; 1706 fact->info.fill_ratio_given = info->fill; 1707 fact->info.fill_ratio_needed = 1.0; 1708 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1709 1710 b = (Mat_SeqAIJ*)(fact)->data; 1711 b->row = isrow; 1712 b->col = iscol; 1713 b->icol = isicol; 1714 ierr = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1715 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1716 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1722 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1723 { 1724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1725 IS isicol; 1726 PetscErrorCode ierr; 1727 const PetscInt *r,*ic; 1728 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1729 PetscInt *bi,*cols,nnz,*cols_lvl; 1730 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1731 PetscInt i,levels,diagonal_fill; 1732 PetscBool col_identity,row_identity; 1733 PetscReal f; 1734 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1735 PetscBT lnkbt; 1736 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1737 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1738 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1739 1740 PetscFunctionBegin; 1741 /* Uncomment the old data struct part only while testing new data structure for MatSolve() */ 1742 /* 1743 PetscBool olddatastruct=PETSC_FALSE; 1744 ierr = PetscOptionsGetBool(NULL,"-ilu_old",&olddatastruct,NULL);CHKERRQ(ierr); 1745 if (olddatastruct) { 1746 ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1747 PetscFunctionReturn(0); 1748 } 1749 */ 1750 1751 levels = (PetscInt)info->levels; 1752 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1753 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1754 if (!levels && row_identity && col_identity) { 1755 /* special case: ilu(0) with natural ordering */ 1756 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1757 if (a->inode.size) { 1758 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1759 } 1760 PetscFunctionReturn(0); 1761 } 1762 1763 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1764 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1765 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1766 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1767 1768 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1769 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1770 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1771 bi[0] = bdiag[0] = 0; 1772 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1773 1774 /* create a linked list for storing column indices of the active row */ 1775 nlnk = n + 1; 1776 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1777 1778 /* initial FreeSpace size is f*(ai[n]+1) */ 1779 f = info->fill; 1780 diagonal_fill = (PetscInt)info->diagonal_fill; 1781 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1782 current_space = free_space; 1783 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1784 current_space_lvl = free_space_lvl; 1785 for (i=0; i<n; i++) { 1786 nzi = 0; 1787 /* copy current row into linked list */ 1788 nnz = ai[r[i]+1] - ai[r[i]]; 1789 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1790 cols = aj + ai[r[i]]; 1791 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1792 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1793 nzi += nlnk; 1794 1795 /* make sure diagonal entry is included */ 1796 if (diagonal_fill && lnk[i] == -1) { 1797 fm = n; 1798 while (lnk[fm] < i) fm = lnk[fm]; 1799 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1800 lnk[fm] = i; 1801 lnk_lvl[i] = 0; 1802 nzi++; dcount++; 1803 } 1804 1805 /* add pivot rows into the active row */ 1806 nzbd = 0; 1807 prow = lnk[n]; 1808 while (prow < i) { 1809 nnz = bdiag[prow]; 1810 cols = bj_ptr[prow] + nnz + 1; 1811 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1812 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1813 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1814 nzi += nlnk; 1815 prow = lnk[prow]; 1816 nzbd++; 1817 } 1818 bdiag[i] = nzbd; 1819 bi[i+1] = bi[i] + nzi; 1820 /* if free space is not available, make more free space */ 1821 if (current_space->local_remaining<nzi) { 1822 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1823 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1824 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1825 reallocs++; 1826 } 1827 1828 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1829 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1830 bj_ptr[i] = current_space->array; 1831 bjlvl_ptr[i] = current_space_lvl->array; 1832 1833 /* make sure the active row i has diagonal entry */ 1834 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1835 1836 current_space->array += nzi; 1837 current_space->local_used += nzi; 1838 current_space->local_remaining -= nzi; 1839 current_space_lvl->array += nzi; 1840 current_space_lvl->local_used += nzi; 1841 current_space_lvl->local_remaining -= nzi; 1842 } 1843 1844 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1845 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1846 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1847 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1848 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1849 1850 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1851 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1852 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1853 1854 #if defined(PETSC_USE_INFO) 1855 { 1856 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1857 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1858 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1859 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1860 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1861 if (diagonal_fill) { 1862 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1863 } 1864 } 1865 #endif 1866 /* put together the new matrix */ 1867 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1868 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1869 b = (Mat_SeqAIJ*)(fact)->data; 1870 1871 b->free_a = PETSC_TRUE; 1872 b->free_ij = PETSC_TRUE; 1873 b->singlemalloc = PETSC_FALSE; 1874 1875 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1876 1877 b->j = bj; 1878 b->i = bi; 1879 b->diag = bdiag; 1880 b->ilen = 0; 1881 b->imax = 0; 1882 b->row = isrow; 1883 b->col = iscol; 1884 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1885 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1886 b->icol = isicol; 1887 1888 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1889 /* In b structure: Free imax, ilen, old a, old j. 1890 Allocate bdiag, solve_work, new a, new j */ 1891 ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1892 b->maxnz = b->nz = bdiag[0]+1; 1893 1894 (fact)->info.factor_mallocs = reallocs; 1895 (fact)->info.fill_ratio_given = f; 1896 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1897 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1898 if (a->inode.size) { 1899 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1900 } 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1906 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1907 { 1908 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1909 IS isicol; 1910 PetscErrorCode ierr; 1911 const PetscInt *r,*ic; 1912 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1913 PetscInt *bi,*cols,nnz,*cols_lvl; 1914 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1915 PetscInt i,levels,diagonal_fill; 1916 PetscBool col_identity,row_identity; 1917 PetscReal f; 1918 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1919 PetscBT lnkbt; 1920 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1921 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1922 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1923 PetscBool missing; 1924 1925 PetscFunctionBegin; 1926 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1927 f = info->fill; 1928 levels = (PetscInt)info->levels; 1929 diagonal_fill = (PetscInt)info->diagonal_fill; 1930 1931 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1932 1933 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1934 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1935 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1936 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1937 1938 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1939 if (a->inode.size) { 1940 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1941 } 1942 fact->factortype = MAT_FACTOR_ILU; 1943 (fact)->info.factor_mallocs = 0; 1944 (fact)->info.fill_ratio_given = info->fill; 1945 (fact)->info.fill_ratio_needed = 1.0; 1946 1947 b = (Mat_SeqAIJ*)(fact)->data; 1948 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1949 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1950 b->row = isrow; 1951 b->col = iscol; 1952 b->icol = isicol; 1953 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1954 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1955 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1956 PetscFunctionReturn(0); 1957 } 1958 1959 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1960 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1961 1962 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1963 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1964 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1965 bi[0] = bdiag[0] = 0; 1966 1967 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1968 1969 /* create a linked list for storing column indices of the active row */ 1970 nlnk = n + 1; 1971 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1972 1973 /* initial FreeSpace size is f*(ai[n]+1) */ 1974 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1975 current_space = free_space; 1976 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1977 current_space_lvl = free_space_lvl; 1978 1979 for (i=0; i<n; i++) { 1980 nzi = 0; 1981 /* copy current row into linked list */ 1982 nnz = ai[r[i]+1] - ai[r[i]]; 1983 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1984 cols = aj + ai[r[i]]; 1985 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1986 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1987 nzi += nlnk; 1988 1989 /* make sure diagonal entry is included */ 1990 if (diagonal_fill && lnk[i] == -1) { 1991 fm = n; 1992 while (lnk[fm] < i) fm = lnk[fm]; 1993 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1994 lnk[fm] = i; 1995 lnk_lvl[i] = 0; 1996 nzi++; dcount++; 1997 } 1998 1999 /* add pivot rows into the active row */ 2000 nzbd = 0; 2001 prow = lnk[n]; 2002 while (prow < i) { 2003 nnz = bdiag[prow]; 2004 cols = bj_ptr[prow] + nnz + 1; 2005 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 2006 nnz = bi[prow+1] - bi[prow] - nnz - 1; 2007 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 2008 nzi += nlnk; 2009 prow = lnk[prow]; 2010 nzbd++; 2011 } 2012 bdiag[i] = nzbd; 2013 bi[i+1] = bi[i] + nzi; 2014 2015 /* if free space is not available, make more free space */ 2016 if (current_space->local_remaining<nzi) { 2017 nnz = nzi*(n - i); /* estimated and max additional space needed */ 2018 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2019 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2020 reallocs++; 2021 } 2022 2023 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2024 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2025 bj_ptr[i] = current_space->array; 2026 bjlvl_ptr[i] = current_space_lvl->array; 2027 2028 /* make sure the active row i has diagonal entry */ 2029 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2030 2031 current_space->array += nzi; 2032 current_space->local_used += nzi; 2033 current_space->local_remaining -= nzi; 2034 current_space_lvl->array += nzi; 2035 current_space_lvl->local_used += nzi; 2036 current_space_lvl->local_remaining -= nzi; 2037 } 2038 2039 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2040 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2041 2042 /* destroy list of free space and other temporary arrays */ 2043 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2044 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2045 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2046 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2047 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2048 2049 #if defined(PETSC_USE_INFO) 2050 { 2051 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2052 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2053 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2054 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2055 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2056 if (diagonal_fill) { 2057 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2058 } 2059 } 2060 #endif 2061 2062 /* put together the new matrix */ 2063 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2064 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2065 b = (Mat_SeqAIJ*)(fact)->data; 2066 2067 b->free_a = PETSC_TRUE; 2068 b->free_ij = PETSC_TRUE; 2069 b->singlemalloc = PETSC_FALSE; 2070 2071 ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2072 b->j = bj; 2073 b->i = bi; 2074 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2075 b->diag = bdiag; 2076 b->ilen = 0; 2077 b->imax = 0; 2078 b->row = isrow; 2079 b->col = iscol; 2080 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2081 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2082 b->icol = isicol; 2083 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2084 /* In b structure: Free imax, ilen, old a, old j. 2085 Allocate bdiag, solve_work, new a, new j */ 2086 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2087 b->maxnz = b->nz = bi[n]; 2088 2089 (fact)->info.factor_mallocs = reallocs; 2090 (fact)->info.fill_ratio_given = f; 2091 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2092 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2093 if (a->inode.size) { 2094 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2095 } 2096 PetscFunctionReturn(0); 2097 } 2098 2099 #undef __FUNCT__ 2100 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2101 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2102 { 2103 Mat C = B; 2104 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2105 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2106 IS ip=b->row,iip = b->icol; 2107 PetscErrorCode ierr; 2108 const PetscInt *rip,*riip; 2109 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2110 PetscInt *ai=a->i,*aj=a->j; 2111 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2112 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2113 PetscBool perm_identity; 2114 FactorShiftCtx sctx; 2115 PetscReal rs; 2116 MatScalar d,*v; 2117 2118 PetscFunctionBegin; 2119 /* MatPivotSetUp(): initialize shift context sctx */ 2120 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2121 2122 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2123 sctx.shift_top = info->zeropivot; 2124 for (i=0; i<mbs; i++) { 2125 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2126 d = (aa)[a->diag[i]]; 2127 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2128 v = aa+ai[i]; 2129 nz = ai[i+1] - ai[i]; 2130 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2131 if (rs>sctx.shift_top) sctx.shift_top = rs; 2132 } 2133 sctx.shift_top *= 1.1; 2134 sctx.nshift_max = 5; 2135 sctx.shift_lo = 0.; 2136 sctx.shift_hi = 1.; 2137 } 2138 2139 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2140 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2141 2142 /* allocate working arrays 2143 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2144 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2145 */ 2146 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 2147 2148 do { 2149 sctx.newshift = PETSC_FALSE; 2150 2151 for (i=0; i<mbs; i++) c2r[i] = mbs; 2152 if (mbs) il[0] = 0; 2153 2154 for (k = 0; k<mbs; k++) { 2155 /* zero rtmp */ 2156 nz = bi[k+1] - bi[k]; 2157 bjtmp = bj + bi[k]; 2158 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2159 2160 /* load in initial unfactored row */ 2161 bval = ba + bi[k]; 2162 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2163 for (j = jmin; j < jmax; j++) { 2164 col = riip[aj[j]]; 2165 if (col >= k) { /* only take upper triangular entry */ 2166 rtmp[col] = aa[j]; 2167 *bval++ = 0.0; /* for in-place factorization */ 2168 } 2169 } 2170 /* shift the diagonal of the matrix: ZeropivotApply() */ 2171 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2172 2173 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2174 dk = rtmp[k]; 2175 i = c2r[k]; /* first row to be added to k_th row */ 2176 2177 while (i < k) { 2178 nexti = c2r[i]; /* next row to be added to k_th row */ 2179 2180 /* compute multiplier, update diag(k) and U(i,k) */ 2181 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2182 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2183 dk += uikdi*ba[ili]; /* update diag[k] */ 2184 ba[ili] = uikdi; /* -U(i,k) */ 2185 2186 /* add multiple of row i to k-th row */ 2187 jmin = ili + 1; jmax = bi[i+1]; 2188 if (jmin < jmax) { 2189 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2190 /* update il and c2r for row i */ 2191 il[i] = jmin; 2192 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2193 } 2194 i = nexti; 2195 } 2196 2197 /* copy data into U(k,:) */ 2198 rs = 0.0; 2199 jmin = bi[k]; jmax = bi[k+1]-1; 2200 if (jmin < jmax) { 2201 for (j=jmin; j<jmax; j++) { 2202 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2203 } 2204 /* add the k-th row into il and c2r */ 2205 il[k] = jmin; 2206 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2207 } 2208 2209 /* MatPivotCheck() */ 2210 sctx.rs = rs; 2211 sctx.pv = dk; 2212 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2213 if (sctx.newshift) break; 2214 dk = sctx.pv; 2215 2216 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2217 } 2218 } while (sctx.newshift); 2219 2220 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2221 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2222 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2223 2224 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2225 if (perm_identity) { 2226 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2227 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2228 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2229 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2230 } else { 2231 B->ops->solve = MatSolve_SeqSBAIJ_1; 2232 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2233 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2234 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2235 } 2236 2237 C->assembled = PETSC_TRUE; 2238 C->preallocated = PETSC_TRUE; 2239 2240 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2241 2242 /* MatPivotView() */ 2243 if (sctx.nshift) { 2244 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2245 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 2246 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2247 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2248 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2249 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 2250 } 2251 } 2252 PetscFunctionReturn(0); 2253 } 2254 2255 #undef __FUNCT__ 2256 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2257 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2258 { 2259 Mat C = B; 2260 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2261 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2262 IS ip=b->row,iip = b->icol; 2263 PetscErrorCode ierr; 2264 const PetscInt *rip,*riip; 2265 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2266 PetscInt *ai=a->i,*aj=a->j; 2267 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2268 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2269 PetscBool perm_identity; 2270 FactorShiftCtx sctx; 2271 PetscReal rs; 2272 MatScalar d,*v; 2273 2274 PetscFunctionBegin; 2275 /* MatPivotSetUp(): initialize shift context sctx */ 2276 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2277 2278 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2279 sctx.shift_top = info->zeropivot; 2280 for (i=0; i<mbs; i++) { 2281 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2282 d = (aa)[a->diag[i]]; 2283 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2284 v = aa+ai[i]; 2285 nz = ai[i+1] - ai[i]; 2286 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2287 if (rs>sctx.shift_top) sctx.shift_top = rs; 2288 } 2289 sctx.shift_top *= 1.1; 2290 sctx.nshift_max = 5; 2291 sctx.shift_lo = 0.; 2292 sctx.shift_hi = 1.; 2293 } 2294 2295 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2296 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2297 2298 /* initialization */ 2299 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 2300 2301 do { 2302 sctx.newshift = PETSC_FALSE; 2303 2304 for (i=0; i<mbs; i++) jl[i] = mbs; 2305 il[0] = 0; 2306 2307 for (k = 0; k<mbs; k++) { 2308 /* zero rtmp */ 2309 nz = bi[k+1] - bi[k]; 2310 bjtmp = bj + bi[k]; 2311 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2312 2313 bval = ba + bi[k]; 2314 /* initialize k-th row by the perm[k]-th row of A */ 2315 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2316 for (j = jmin; j < jmax; j++) { 2317 col = riip[aj[j]]; 2318 if (col >= k) { /* only take upper triangular entry */ 2319 rtmp[col] = aa[j]; 2320 *bval++ = 0.0; /* for in-place factorization */ 2321 } 2322 } 2323 /* shift the diagonal of the matrix */ 2324 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2325 2326 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2327 dk = rtmp[k]; 2328 i = jl[k]; /* first row to be added to k_th row */ 2329 2330 while (i < k) { 2331 nexti = jl[i]; /* next row to be added to k_th row */ 2332 2333 /* compute multiplier, update diag(k) and U(i,k) */ 2334 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2335 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2336 dk += uikdi*ba[ili]; 2337 ba[ili] = uikdi; /* -U(i,k) */ 2338 2339 /* add multiple of row i to k-th row */ 2340 jmin = ili + 1; jmax = bi[i+1]; 2341 if (jmin < jmax) { 2342 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2343 /* update il and jl for row i */ 2344 il[i] = jmin; 2345 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2346 } 2347 i = nexti; 2348 } 2349 2350 /* shift the diagonals when zero pivot is detected */ 2351 /* compute rs=sum of abs(off-diagonal) */ 2352 rs = 0.0; 2353 jmin = bi[k]+1; 2354 nz = bi[k+1] - jmin; 2355 bcol = bj + jmin; 2356 for (j=0; j<nz; j++) { 2357 rs += PetscAbsScalar(rtmp[bcol[j]]); 2358 } 2359 2360 sctx.rs = rs; 2361 sctx.pv = dk; 2362 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2363 if (sctx.newshift) break; 2364 dk = sctx.pv; 2365 2366 /* copy data into U(k,:) */ 2367 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2368 jmin = bi[k]+1; jmax = bi[k+1]; 2369 if (jmin < jmax) { 2370 for (j=jmin; j<jmax; j++) { 2371 col = bj[j]; ba[j] = rtmp[col]; 2372 } 2373 /* add the k-th row into il and jl */ 2374 il[k] = jmin; 2375 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2376 } 2377 } 2378 } while (sctx.newshift); 2379 2380 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2381 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2382 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2383 2384 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2385 if (perm_identity) { 2386 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2387 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2388 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2389 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2390 } else { 2391 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2392 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2393 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2394 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2395 } 2396 2397 C->assembled = PETSC_TRUE; 2398 C->preallocated = PETSC_TRUE; 2399 2400 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2401 if (sctx.nshift) { 2402 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2403 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2404 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2405 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2406 } 2407 } 2408 PetscFunctionReturn(0); 2409 } 2410 2411 /* 2412 icc() under revised new data structure. 2413 Factored arrays bj and ba are stored as 2414 U(0,:),...,U(i,:),U(n-1,:) 2415 2416 ui=fact->i is an array of size n+1, in which 2417 ui+ 2418 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2419 ui[n]: points to U(n-1,n-1)+1 2420 2421 udiag=fact->diag is an array of size n,in which 2422 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2423 2424 U(i,:) contains udiag[i] as its last entry, i.e., 2425 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2426 */ 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2430 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2431 { 2432 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2433 Mat_SeqSBAIJ *b; 2434 PetscErrorCode ierr; 2435 PetscBool perm_identity,missing; 2436 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2437 const PetscInt *rip,*riip; 2438 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2439 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2440 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2441 PetscReal fill =info->fill,levels=info->levels; 2442 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2443 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2444 PetscBT lnkbt; 2445 IS iperm; 2446 2447 PetscFunctionBegin; 2448 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2449 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2450 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2451 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2452 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2453 2454 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2455 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2456 ui[0] = 0; 2457 2458 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2459 if (!levels && perm_identity) { 2460 for (i=0; i<am; i++) { 2461 ncols = ai[i+1] - a->diag[i]; 2462 ui[i+1] = ui[i] + ncols; 2463 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2464 } 2465 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2466 cols = uj; 2467 for (i=0; i<am; i++) { 2468 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2469 ncols = ai[i+1] - a->diag[i] -1; 2470 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2471 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2472 } 2473 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2474 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2475 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2476 2477 /* initialization */ 2478 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2479 2480 /* jl: linked list for storing indices of the pivot rows 2481 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2482 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2483 for (i=0; i<am; i++) { 2484 jl[i] = am; il[i] = 0; 2485 } 2486 2487 /* create and initialize a linked list for storing column indices of the active row k */ 2488 nlnk = am + 1; 2489 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2490 2491 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2492 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2493 current_space = free_space; 2494 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2495 current_space_lvl = free_space_lvl; 2496 2497 for (k=0; k<am; k++) { /* for each active row k */ 2498 /* initialize lnk by the column indices of row rip[k] of A */ 2499 nzk = 0; 2500 ncols = ai[rip[k]+1] - ai[rip[k]]; 2501 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2502 ncols_upper = 0; 2503 for (j=0; j<ncols; j++) { 2504 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2505 if (riip[i] >= k) { /* only take upper triangular entry */ 2506 ajtmp[ncols_upper] = i; 2507 ncols_upper++; 2508 } 2509 } 2510 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2511 nzk += nlnk; 2512 2513 /* update lnk by computing fill-in for each pivot row to be merged in */ 2514 prow = jl[k]; /* 1st pivot row */ 2515 2516 while (prow < k) { 2517 nextprow = jl[prow]; 2518 2519 /* merge prow into k-th row */ 2520 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2521 jmax = ui[prow+1]; 2522 ncols = jmax-jmin; 2523 i = jmin - ui[prow]; 2524 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2525 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2526 j = *(uj - 1); 2527 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2528 nzk += nlnk; 2529 2530 /* update il and jl for prow */ 2531 if (jmin < jmax) { 2532 il[prow] = jmin; 2533 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2534 } 2535 prow = nextprow; 2536 } 2537 2538 /* if free space is not available, make more free space */ 2539 if (current_space->local_remaining<nzk) { 2540 i = am - k + 1; /* num of unfactored rows */ 2541 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2542 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2543 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2544 reallocs++; 2545 } 2546 2547 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2548 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2549 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2550 2551 /* add the k-th row into il and jl */ 2552 if (nzk > 1) { 2553 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2554 jl[k] = jl[i]; jl[i] = k; 2555 il[k] = ui[k] + 1; 2556 } 2557 uj_ptr[k] = current_space->array; 2558 uj_lvl_ptr[k] = current_space_lvl->array; 2559 2560 current_space->array += nzk; 2561 current_space->local_used += nzk; 2562 current_space->local_remaining -= nzk; 2563 2564 current_space_lvl->array += nzk; 2565 current_space_lvl->local_used += nzk; 2566 current_space_lvl->local_remaining -= nzk; 2567 2568 ui[k+1] = ui[k] + nzk; 2569 } 2570 2571 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2572 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2573 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2574 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2575 2576 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2577 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2578 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2579 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2580 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2581 2582 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2583 2584 /* put together the new matrix in MATSEQSBAIJ format */ 2585 b = (Mat_SeqSBAIJ*)(fact)->data; 2586 b->singlemalloc = PETSC_FALSE; 2587 2588 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2589 2590 b->j = uj; 2591 b->i = ui; 2592 b->diag = udiag; 2593 b->free_diag = PETSC_TRUE; 2594 b->ilen = 0; 2595 b->imax = 0; 2596 b->row = perm; 2597 b->col = perm; 2598 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2599 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2600 b->icol = iperm; 2601 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2602 2603 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2604 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2605 2606 b->maxnz = b->nz = ui[am]; 2607 b->free_a = PETSC_TRUE; 2608 b->free_ij = PETSC_TRUE; 2609 2610 fact->info.factor_mallocs = reallocs; 2611 fact->info.fill_ratio_given = fill; 2612 if (ai[am] != 0) { 2613 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2614 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2615 } else { 2616 fact->info.fill_ratio_needed = 0.0; 2617 } 2618 #if defined(PETSC_USE_INFO) 2619 if (ai[am] != 0) { 2620 PetscReal af = fact->info.fill_ratio_needed; 2621 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2622 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2623 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2624 } else { 2625 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2626 } 2627 #endif 2628 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2629 PetscFunctionReturn(0); 2630 } 2631 2632 #undef __FUNCT__ 2633 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2634 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2635 { 2636 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2637 Mat_SeqSBAIJ *b; 2638 PetscErrorCode ierr; 2639 PetscBool perm_identity,missing; 2640 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2641 const PetscInt *rip,*riip; 2642 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2643 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2644 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2645 PetscReal fill =info->fill,levels=info->levels; 2646 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2647 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2648 PetscBT lnkbt; 2649 IS iperm; 2650 2651 PetscFunctionBegin; 2652 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2653 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2654 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2655 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2656 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2657 2658 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2659 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2660 ui[0] = 0; 2661 2662 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2663 if (!levels && perm_identity) { 2664 2665 for (i=0; i<am; i++) { 2666 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2667 udiag[i] = ui[i]; 2668 } 2669 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2670 cols = uj; 2671 for (i=0; i<am; i++) { 2672 aj = a->j + a->diag[i]; 2673 ncols = ui[i+1] - ui[i]; 2674 for (j=0; j<ncols; j++) *cols++ = *aj++; 2675 } 2676 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2677 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2678 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2679 2680 /* initialization */ 2681 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2682 2683 /* jl: linked list for storing indices of the pivot rows 2684 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2685 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2686 for (i=0; i<am; i++) { 2687 jl[i] = am; il[i] = 0; 2688 } 2689 2690 /* create and initialize a linked list for storing column indices of the active row k */ 2691 nlnk = am + 1; 2692 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2693 2694 /* initial FreeSpace size is fill*(ai[am]+1) */ 2695 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2696 current_space = free_space; 2697 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2698 current_space_lvl = free_space_lvl; 2699 2700 for (k=0; k<am; k++) { /* for each active row k */ 2701 /* initialize lnk by the column indices of row rip[k] of A */ 2702 nzk = 0; 2703 ncols = ai[rip[k]+1] - ai[rip[k]]; 2704 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2705 ncols_upper = 0; 2706 for (j=0; j<ncols; j++) { 2707 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2708 if (riip[i] >= k) { /* only take upper triangular entry */ 2709 ajtmp[ncols_upper] = i; 2710 ncols_upper++; 2711 } 2712 } 2713 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2714 nzk += nlnk; 2715 2716 /* update lnk by computing fill-in for each pivot row to be merged in */ 2717 prow = jl[k]; /* 1st pivot row */ 2718 2719 while (prow < k) { 2720 nextprow = jl[prow]; 2721 2722 /* merge prow into k-th row */ 2723 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2724 jmax = ui[prow+1]; 2725 ncols = jmax-jmin; 2726 i = jmin - ui[prow]; 2727 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2728 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2729 j = *(uj - 1); 2730 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2731 nzk += nlnk; 2732 2733 /* update il and jl for prow */ 2734 if (jmin < jmax) { 2735 il[prow] = jmin; 2736 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2737 } 2738 prow = nextprow; 2739 } 2740 2741 /* if free space is not available, make more free space */ 2742 if (current_space->local_remaining<nzk) { 2743 i = am - k + 1; /* num of unfactored rows */ 2744 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2745 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2746 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2747 reallocs++; 2748 } 2749 2750 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2751 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2752 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2753 2754 /* add the k-th row into il and jl */ 2755 if (nzk > 1) { 2756 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2757 jl[k] = jl[i]; jl[i] = k; 2758 il[k] = ui[k] + 1; 2759 } 2760 uj_ptr[k] = current_space->array; 2761 uj_lvl_ptr[k] = current_space_lvl->array; 2762 2763 current_space->array += nzk; 2764 current_space->local_used += nzk; 2765 current_space->local_remaining -= nzk; 2766 2767 current_space_lvl->array += nzk; 2768 current_space_lvl->local_used += nzk; 2769 current_space_lvl->local_remaining -= nzk; 2770 2771 ui[k+1] = ui[k] + nzk; 2772 } 2773 2774 #if defined(PETSC_USE_INFO) 2775 if (ai[am] != 0) { 2776 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2777 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2778 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2779 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2780 } else { 2781 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2782 } 2783 #endif 2784 2785 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2786 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2787 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2788 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2789 2790 /* destroy list of free space and other temporary array(s) */ 2791 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2792 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2793 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2794 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2795 2796 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2797 2798 /* put together the new matrix in MATSEQSBAIJ format */ 2799 2800 b = (Mat_SeqSBAIJ*)fact->data; 2801 b->singlemalloc = PETSC_FALSE; 2802 2803 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2804 2805 b->j = uj; 2806 b->i = ui; 2807 b->diag = udiag; 2808 b->free_diag = PETSC_TRUE; 2809 b->ilen = 0; 2810 b->imax = 0; 2811 b->row = perm; 2812 b->col = perm; 2813 2814 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2815 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2816 2817 b->icol = iperm; 2818 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2819 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2820 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2821 b->maxnz = b->nz = ui[am]; 2822 b->free_a = PETSC_TRUE; 2823 b->free_ij = PETSC_TRUE; 2824 2825 fact->info.factor_mallocs = reallocs; 2826 fact->info.fill_ratio_given = fill; 2827 if (ai[am] != 0) { 2828 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2829 } else { 2830 fact->info.fill_ratio_needed = 0.0; 2831 } 2832 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2833 PetscFunctionReturn(0); 2834 } 2835 2836 #undef __FUNCT__ 2837 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2838 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2839 { 2840 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2841 Mat_SeqSBAIJ *b; 2842 PetscErrorCode ierr; 2843 PetscBool perm_identity; 2844 PetscReal fill = info->fill; 2845 const PetscInt *rip,*riip; 2846 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2847 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2848 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2849 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2850 PetscBT lnkbt; 2851 IS iperm; 2852 2853 PetscFunctionBegin; 2854 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2855 /* check whether perm is the identity mapping */ 2856 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2857 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2858 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2859 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2860 2861 /* initialization */ 2862 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2863 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2864 ui[0] = 0; 2865 2866 /* jl: linked list for storing indices of the pivot rows 2867 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2868 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2869 for (i=0; i<am; i++) { 2870 jl[i] = am; il[i] = 0; 2871 } 2872 2873 /* create and initialize a linked list for storing column indices of the active row k */ 2874 nlnk = am + 1; 2875 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2876 2877 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2878 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2879 current_space = free_space; 2880 2881 for (k=0; k<am; k++) { /* for each active row k */ 2882 /* initialize lnk by the column indices of row rip[k] of A */ 2883 nzk = 0; 2884 ncols = ai[rip[k]+1] - ai[rip[k]]; 2885 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2886 ncols_upper = 0; 2887 for (j=0; j<ncols; j++) { 2888 i = riip[*(aj + ai[rip[k]] + j)]; 2889 if (i >= k) { /* only take upper triangular entry */ 2890 cols[ncols_upper] = i; 2891 ncols_upper++; 2892 } 2893 } 2894 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2895 nzk += nlnk; 2896 2897 /* update lnk by computing fill-in for each pivot row to be merged in */ 2898 prow = jl[k]; /* 1st pivot row */ 2899 2900 while (prow < k) { 2901 nextprow = jl[prow]; 2902 /* merge prow into k-th row */ 2903 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2904 jmax = ui[prow+1]; 2905 ncols = jmax-jmin; 2906 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2907 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2908 nzk += nlnk; 2909 2910 /* update il and jl for prow */ 2911 if (jmin < jmax) { 2912 il[prow] = jmin; 2913 j = *uj_ptr; 2914 jl[prow] = jl[j]; 2915 jl[j] = prow; 2916 } 2917 prow = nextprow; 2918 } 2919 2920 /* if free space is not available, make more free space */ 2921 if (current_space->local_remaining<nzk) { 2922 i = am - k + 1; /* num of unfactored rows */ 2923 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2924 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2925 reallocs++; 2926 } 2927 2928 /* copy data into free space, then initialize lnk */ 2929 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2930 2931 /* add the k-th row into il and jl */ 2932 if (nzk > 1) { 2933 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2934 jl[k] = jl[i]; jl[i] = k; 2935 il[k] = ui[k] + 1; 2936 } 2937 ui_ptr[k] = current_space->array; 2938 2939 current_space->array += nzk; 2940 current_space->local_used += nzk; 2941 current_space->local_remaining -= nzk; 2942 2943 ui[k+1] = ui[k] + nzk; 2944 } 2945 2946 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2947 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2948 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2949 2950 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2951 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2952 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2953 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2954 2955 /* put together the new matrix in MATSEQSBAIJ format */ 2956 2957 b = (Mat_SeqSBAIJ*)fact->data; 2958 b->singlemalloc = PETSC_FALSE; 2959 b->free_a = PETSC_TRUE; 2960 b->free_ij = PETSC_TRUE; 2961 2962 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2963 2964 b->j = uj; 2965 b->i = ui; 2966 b->diag = udiag; 2967 b->free_diag = PETSC_TRUE; 2968 b->ilen = 0; 2969 b->imax = 0; 2970 b->row = perm; 2971 b->col = perm; 2972 2973 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2974 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2975 2976 b->icol = iperm; 2977 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2978 2979 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2980 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2981 2982 b->maxnz = b->nz = ui[am]; 2983 2984 fact->info.factor_mallocs = reallocs; 2985 fact->info.fill_ratio_given = fill; 2986 if (ai[am] != 0) { 2987 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2988 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2989 } else { 2990 fact->info.fill_ratio_needed = 0.0; 2991 } 2992 #if defined(PETSC_USE_INFO) 2993 if (ai[am] != 0) { 2994 PetscReal af = fact->info.fill_ratio_needed; 2995 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2996 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2997 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2998 } else { 2999 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3000 } 3001 #endif 3002 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 3003 PetscFunctionReturn(0); 3004 } 3005 3006 #undef __FUNCT__ 3007 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 3008 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 3009 { 3010 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3011 Mat_SeqSBAIJ *b; 3012 PetscErrorCode ierr; 3013 PetscBool perm_identity; 3014 PetscReal fill = info->fill; 3015 const PetscInt *rip,*riip; 3016 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 3017 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 3018 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 3019 PetscFreeSpaceList free_space=NULL,current_space=NULL; 3020 PetscBT lnkbt; 3021 IS iperm; 3022 3023 PetscFunctionBegin; 3024 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 3025 /* check whether perm is the identity mapping */ 3026 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 3027 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 3028 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 3029 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 3030 3031 /* initialization */ 3032 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 3033 ui[0] = 0; 3034 3035 /* jl: linked list for storing indices of the pivot rows 3036 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3037 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 3038 for (i=0; i<am; i++) { 3039 jl[i] = am; il[i] = 0; 3040 } 3041 3042 /* create and initialize a linked list for storing column indices of the active row k */ 3043 nlnk = am + 1; 3044 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3045 3046 /* initial FreeSpace size is fill*(ai[am]+1) */ 3047 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3048 current_space = free_space; 3049 3050 for (k=0; k<am; k++) { /* for each active row k */ 3051 /* initialize lnk by the column indices of row rip[k] of A */ 3052 nzk = 0; 3053 ncols = ai[rip[k]+1] - ai[rip[k]]; 3054 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3055 ncols_upper = 0; 3056 for (j=0; j<ncols; j++) { 3057 i = riip[*(aj + ai[rip[k]] + j)]; 3058 if (i >= k) { /* only take upper triangular entry */ 3059 cols[ncols_upper] = i; 3060 ncols_upper++; 3061 } 3062 } 3063 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3064 nzk += nlnk; 3065 3066 /* update lnk by computing fill-in for each pivot row to be merged in */ 3067 prow = jl[k]; /* 1st pivot row */ 3068 3069 while (prow < k) { 3070 nextprow = jl[prow]; 3071 /* merge prow into k-th row */ 3072 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3073 jmax = ui[prow+1]; 3074 ncols = jmax-jmin; 3075 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3076 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3077 nzk += nlnk; 3078 3079 /* update il and jl for prow */ 3080 if (jmin < jmax) { 3081 il[prow] = jmin; 3082 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3083 } 3084 prow = nextprow; 3085 } 3086 3087 /* if free space is not available, make more free space */ 3088 if (current_space->local_remaining<nzk) { 3089 i = am - k + 1; /* num of unfactored rows */ 3090 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3091 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3092 reallocs++; 3093 } 3094 3095 /* copy data into free space, then initialize lnk */ 3096 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3097 3098 /* add the k-th row into il and jl */ 3099 if (nzk-1 > 0) { 3100 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3101 jl[k] = jl[i]; jl[i] = k; 3102 il[k] = ui[k] + 1; 3103 } 3104 ui_ptr[k] = current_space->array; 3105 3106 current_space->array += nzk; 3107 current_space->local_used += nzk; 3108 current_space->local_remaining -= nzk; 3109 3110 ui[k+1] = ui[k] + nzk; 3111 } 3112 3113 #if defined(PETSC_USE_INFO) 3114 if (ai[am] != 0) { 3115 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3116 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3117 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3118 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3119 } else { 3120 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3121 } 3122 #endif 3123 3124 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3125 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3126 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3127 3128 /* destroy list of free space and other temporary array(s) */ 3129 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3130 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3131 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3132 3133 /* put together the new matrix in MATSEQSBAIJ format */ 3134 3135 b = (Mat_SeqSBAIJ*)fact->data; 3136 b->singlemalloc = PETSC_FALSE; 3137 b->free_a = PETSC_TRUE; 3138 b->free_ij = PETSC_TRUE; 3139 3140 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 3141 3142 b->j = uj; 3143 b->i = ui; 3144 b->diag = 0; 3145 b->ilen = 0; 3146 b->imax = 0; 3147 b->row = perm; 3148 b->col = perm; 3149 3150 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3151 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3152 3153 b->icol = iperm; 3154 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3155 3156 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3157 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3158 b->maxnz = b->nz = ui[am]; 3159 3160 fact->info.factor_mallocs = reallocs; 3161 fact->info.fill_ratio_given = fill; 3162 if (ai[am] != 0) { 3163 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3164 } else { 3165 fact->info.fill_ratio_needed = 0.0; 3166 } 3167 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3168 PetscFunctionReturn(0); 3169 } 3170 3171 #undef __FUNCT__ 3172 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3173 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3174 { 3175 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3176 PetscErrorCode ierr; 3177 PetscInt n = A->rmap->n; 3178 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3179 PetscScalar *x,sum; 3180 const PetscScalar *b; 3181 const MatScalar *aa = a->a,*v; 3182 PetscInt i,nz; 3183 3184 PetscFunctionBegin; 3185 if (!n) PetscFunctionReturn(0); 3186 3187 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3188 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3189 3190 /* forward solve the lower triangular */ 3191 x[0] = b[0]; 3192 v = aa; 3193 vi = aj; 3194 for (i=1; i<n; i++) { 3195 nz = ai[i+1] - ai[i]; 3196 sum = b[i]; 3197 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3198 v += nz; 3199 vi += nz; 3200 x[i] = sum; 3201 } 3202 3203 /* backward solve the upper triangular */ 3204 for (i=n-1; i>=0; i--) { 3205 v = aa + adiag[i+1] + 1; 3206 vi = aj + adiag[i+1] + 1; 3207 nz = adiag[i] - adiag[i+1]-1; 3208 sum = x[i]; 3209 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3210 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3211 } 3212 3213 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3214 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3215 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3216 PetscFunctionReturn(0); 3217 } 3218 3219 #undef __FUNCT__ 3220 #define __FUNCT__ "MatSolve_SeqAIJ" 3221 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3222 { 3223 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3224 IS iscol = a->col,isrow = a->row; 3225 PetscErrorCode ierr; 3226 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3227 const PetscInt *rout,*cout,*r,*c; 3228 PetscScalar *x,*tmp,sum; 3229 const PetscScalar *b; 3230 const MatScalar *aa = a->a,*v; 3231 3232 PetscFunctionBegin; 3233 if (!n) PetscFunctionReturn(0); 3234 3235 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3236 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3237 tmp = a->solve_work; 3238 3239 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3240 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3241 3242 /* forward solve the lower triangular */ 3243 tmp[0] = b[r[0]]; 3244 v = aa; 3245 vi = aj; 3246 for (i=1; i<n; i++) { 3247 nz = ai[i+1] - ai[i]; 3248 sum = b[r[i]]; 3249 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3250 tmp[i] = sum; 3251 v += nz; vi += nz; 3252 } 3253 3254 /* backward solve the upper triangular */ 3255 for (i=n-1; i>=0; i--) { 3256 v = aa + adiag[i+1]+1; 3257 vi = aj + adiag[i+1]+1; 3258 nz = adiag[i]-adiag[i+1]-1; 3259 sum = tmp[i]; 3260 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3261 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3262 } 3263 3264 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3265 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3266 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3267 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3268 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3269 PetscFunctionReturn(0); 3270 } 3271 3272 #undef __FUNCT__ 3273 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3274 /* 3275 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3276 */ 3277 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3278 { 3279 Mat B = *fact; 3280 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3281 IS isicol; 3282 PetscErrorCode ierr; 3283 const PetscInt *r,*ic; 3284 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3285 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3286 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3287 PetscInt nlnk,*lnk; 3288 PetscBT lnkbt; 3289 PetscBool row_identity,icol_identity; 3290 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3291 const PetscInt *ics; 3292 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3293 PetscReal dt =info->dt,shift=info->shiftamount; 3294 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3295 PetscBool missing; 3296 3297 PetscFunctionBegin; 3298 if (dt == PETSC_DEFAULT) dt = 0.005; 3299 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3300 3301 /* ------- symbolic factorization, can be reused ---------*/ 3302 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3303 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3304 adiag=a->diag; 3305 3306 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3307 3308 /* bdiag is location of diagonal in factor */ 3309 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3310 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3311 3312 /* allocate row pointers bi */ 3313 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3314 3315 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3316 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3317 nnz_max = ai[n]+2*n*dtcount+2; 3318 3319 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3320 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3321 3322 /* put together the new matrix */ 3323 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3324 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3325 b = (Mat_SeqAIJ*)B->data; 3326 3327 b->free_a = PETSC_TRUE; 3328 b->free_ij = PETSC_TRUE; 3329 b->singlemalloc = PETSC_FALSE; 3330 3331 b->a = ba; 3332 b->j = bj; 3333 b->i = bi; 3334 b->diag = bdiag; 3335 b->ilen = 0; 3336 b->imax = 0; 3337 b->row = isrow; 3338 b->col = iscol; 3339 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3340 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3341 b->icol = isicol; 3342 3343 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3344 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3345 b->maxnz = nnz_max; 3346 3347 B->factortype = MAT_FACTOR_ILUDT; 3348 B->info.factor_mallocs = 0; 3349 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3350 /* ------- end of symbolic factorization ---------*/ 3351 3352 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3353 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3354 ics = ic; 3355 3356 /* linked list for storing column indices of the active row */ 3357 nlnk = n + 1; 3358 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3359 3360 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3361 ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr); 3362 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3363 ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr); 3364 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3365 3366 bi[0] = 0; 3367 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3368 bdiag_rev[n] = bdiag[0]; 3369 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3370 for (i=0; i<n; i++) { 3371 /* copy initial fill into linked list */ 3372 nzi = ai[r[i]+1] - ai[r[i]]; 3373 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3374 nzi_al = adiag[r[i]] - ai[r[i]]; 3375 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3376 ajtmp = aj + ai[r[i]]; 3377 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3378 3379 /* load in initial (unfactored row) */ 3380 aatmp = a->a + ai[r[i]]; 3381 for (j=0; j<nzi; j++) { 3382 rtmp[ics[*ajtmp++]] = *aatmp++; 3383 } 3384 3385 /* add pivot rows into linked list */ 3386 row = lnk[n]; 3387 while (row < i) { 3388 nzi_bl = bi[row+1] - bi[row] + 1; 3389 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3390 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3391 nzi += nlnk; 3392 row = lnk[row]; 3393 } 3394 3395 /* copy data from lnk into jtmp, then initialize lnk */ 3396 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3397 3398 /* numerical factorization */ 3399 bjtmp = jtmp; 3400 row = *bjtmp++; /* 1st pivot row */ 3401 while (row < i) { 3402 pc = rtmp + row; 3403 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3404 multiplier = (*pc) * (*pv); 3405 *pc = multiplier; 3406 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3407 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3408 pv = ba + bdiag[row+1] + 1; 3409 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3410 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3411 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3412 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3413 } 3414 row = *bjtmp++; 3415 } 3416 3417 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3418 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3419 nzi_bl = 0; j = 0; 3420 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3421 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3422 nzi_bl++; j++; 3423 } 3424 nzi_bu = nzi - nzi_bl -1; 3425 while (j < nzi) { 3426 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3427 j++; 3428 } 3429 3430 bjtmp = bj + bi[i]; 3431 batmp = ba + bi[i]; 3432 /* apply level dropping rule to L part */ 3433 ncut = nzi_al + dtcount; 3434 if (ncut < nzi_bl) { 3435 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3436 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3437 } else { 3438 ncut = nzi_bl; 3439 } 3440 for (j=0; j<ncut; j++) { 3441 bjtmp[j] = jtmp[j]; 3442 batmp[j] = vtmp[j]; 3443 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3444 } 3445 bi[i+1] = bi[i] + ncut; 3446 nzi = ncut + 1; 3447 3448 /* apply level dropping rule to U part */ 3449 ncut = nzi_au + dtcount; 3450 if (ncut < nzi_bu) { 3451 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3452 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3453 } else { 3454 ncut = nzi_bu; 3455 } 3456 nzi += ncut; 3457 3458 /* mark bdiagonal */ 3459 bdiag[i+1] = bdiag[i] - (ncut + 1); 3460 bdiag_rev[n-i-1] = bdiag[i+1]; 3461 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3462 bjtmp = bj + bdiag[i]; 3463 batmp = ba + bdiag[i]; 3464 *bjtmp = i; 3465 *batmp = diag_tmp; /* rtmp[i]; */ 3466 if (*batmp == 0.0) { 3467 *batmp = dt+shift; 3468 /* printf(" row %d add shift %g\n",i,shift); */ 3469 } 3470 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3471 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3472 3473 bjtmp = bj + bdiag[i+1]+1; 3474 batmp = ba + bdiag[i+1]+1; 3475 for (k=0; k<ncut; k++) { 3476 bjtmp[k] = jtmp[nzi_bl+1+k]; 3477 batmp[k] = vtmp[nzi_bl+1+k]; 3478 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3479 } 3480 /* printf("\n"); */ 3481 3482 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3483 /* 3484 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3485 printf(" ----------------------------\n"); 3486 */ 3487 } /* for (i=0; i<n; i++) */ 3488 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3489 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3490 3491 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3492 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3493 3494 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3495 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3496 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3497 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3498 3499 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3500 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3501 3502 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3503 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3504 if (row_identity && icol_identity) { 3505 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3506 } else { 3507 B->ops->solve = MatSolve_SeqAIJ; 3508 } 3509 3510 B->ops->solveadd = 0; 3511 B->ops->solvetranspose = 0; 3512 B->ops->solvetransposeadd = 0; 3513 B->ops->matsolve = 0; 3514 B->assembled = PETSC_TRUE; 3515 B->preallocated = PETSC_TRUE; 3516 PetscFunctionReturn(0); 3517 } 3518 3519 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3520 #undef __FUNCT__ 3521 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3522 /* 3523 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3524 */ 3525 3526 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3527 { 3528 PetscErrorCode ierr; 3529 3530 PetscFunctionBegin; 3531 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3532 PetscFunctionReturn(0); 3533 } 3534 3535 /* 3536 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3537 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3538 */ 3539 #undef __FUNCT__ 3540 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3541 /* 3542 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3543 */ 3544 3545 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3546 { 3547 Mat C =fact; 3548 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3549 IS isrow = b->row,isicol = b->icol; 3550 PetscErrorCode ierr; 3551 const PetscInt *r,*ic,*ics; 3552 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3553 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3554 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3555 PetscReal dt=info->dt,shift=info->shiftamount; 3556 PetscBool row_identity, col_identity; 3557 3558 PetscFunctionBegin; 3559 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3560 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3561 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3562 ics = ic; 3563 3564 for (i=0; i<n; i++) { 3565 /* initialize rtmp array */ 3566 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3567 bjtmp = bj + bi[i]; 3568 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3569 rtmp[i] = 0.0; 3570 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3571 bjtmp = bj + bdiag[i+1] + 1; 3572 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3573 3574 /* load in initial unfactored row of A */ 3575 /* printf("row %d\n",i); */ 3576 nz = ai[r[i]+1] - ai[r[i]]; 3577 ajtmp = aj + ai[r[i]]; 3578 v = aa + ai[r[i]]; 3579 for (j=0; j<nz; j++) { 3580 rtmp[ics[*ajtmp++]] = v[j]; 3581 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3582 } 3583 /* printf("\n"); */ 3584 3585 /* numerical factorization */ 3586 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3587 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3588 k = 0; 3589 while (k < nzl) { 3590 row = *bjtmp++; 3591 /* printf(" prow %d\n",row); */ 3592 pc = rtmp + row; 3593 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3594 multiplier = (*pc) * (*pv); 3595 *pc = multiplier; 3596 if (PetscAbsScalar(multiplier) > dt) { 3597 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3598 pv = b->a + bdiag[row+1] + 1; 3599 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3600 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3601 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3602 } 3603 k++; 3604 } 3605 3606 /* finished row so stick it into b->a */ 3607 /* L-part */ 3608 pv = b->a + bi[i]; 3609 pj = bj + bi[i]; 3610 nzl = bi[i+1] - bi[i]; 3611 for (j=0; j<nzl; j++) { 3612 pv[j] = rtmp[pj[j]]; 3613 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3614 } 3615 3616 /* diagonal: invert diagonal entries for simplier triangular solves */ 3617 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3618 b->a[bdiag[i]] = 1.0/rtmp[i]; 3619 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3620 3621 /* U-part */ 3622 pv = b->a + bdiag[i+1] + 1; 3623 pj = bj + bdiag[i+1] + 1; 3624 nzu = bdiag[i] - bdiag[i+1] - 1; 3625 for (j=0; j<nzu; j++) { 3626 pv[j] = rtmp[pj[j]]; 3627 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3628 } 3629 /* printf("\n"); */ 3630 } 3631 3632 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3633 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3634 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3635 3636 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3637 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3638 if (row_identity && col_identity) { 3639 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3640 } else { 3641 C->ops->solve = MatSolve_SeqAIJ; 3642 } 3643 C->ops->solveadd = 0; 3644 C->ops->solvetranspose = 0; 3645 C->ops->solvetransposeadd = 0; 3646 C->ops->matsolve = 0; 3647 C->assembled = PETSC_TRUE; 3648 C->preallocated = PETSC_TRUE; 3649 3650 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3651 PetscFunctionReturn(0); 3652 } 3653