1 #include "petsc.h" 2 #include "../src/mat/impls/aij/seq/aij.h" 3 #include "spbas.h" 4 5 /* 6 spbas_memory_requirement: 7 Calculate the number of bytes needed to store tha matrix 8 */ 9 #undef __FUNCT__ 10 #define __FUNCT__ "spbas_memory_requirement" 11 long int spbas_memory_requirement( spbas_matrix matrix) 12 { 13 long int memreq = 6 * sizeof(PetscInt) + /* nrows, ncols, nnz, n_alloc_icol, 14 n_alloc_val, col_idx_type */ 15 sizeof(PetscTruth) + /* block_data */ 16 sizeof(PetscScalar**) + /* values */ 17 sizeof(PetscScalar*) + /* alloc_val */ 18 2 * sizeof(int**) + /* icols, icols0 */ 19 2 * sizeof(PetscInt*) + /* row_nnz, alloc_icol */ 20 matrix.nrows * sizeof(PetscInt) + /* row_nnz[*] */ 21 matrix.nrows * sizeof(PetscInt*); /* icols[*] */ 22 23 /* icol0[*] */ 24 if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) 25 { memreq += matrix.nrows * sizeof(PetscInt); } 26 27 /* icols[*][*] */ 28 if (matrix.block_data) 29 { memreq += matrix.n_alloc_icol * sizeof(PetscInt); } 30 else 31 { memreq += matrix.nnz * sizeof(PetscInt); } 32 33 if (matrix.values) 34 { 35 memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */ 36 /* values[*][*] */ 37 if (matrix.block_data) 38 { memreq += matrix.n_alloc_val * sizeof(PetscScalar); } 39 else 40 { memreq += matrix.nnz * sizeof(PetscScalar); } 41 } 42 43 return memreq; 44 } 45 46 /* 47 spbas_allocate_pattern: 48 allocate the pattern arrays row_nnz, icols and optionally values 49 */ 50 #undef __FUNCT__ 51 #define __FUNCT__ "spbas_allocate_pattern" 52 PetscErrorCode spbas_allocate_pattern( spbas_matrix * result, PetscTruth do_values) 53 { 54 PetscErrorCode ierr; 55 PetscInt nrows = result->nrows; 56 PetscInt col_idx_type = result->col_idx_type; 57 58 PetscFunctionBegin; 59 /* Allocate sparseness pattern */ 60 ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->row_nnz);CHKERRQ(ierr); 61 ierr = PetscMalloc(nrows*sizeof(PetscInt*),&result->icols);CHKERRQ(ierr); 62 63 /* If offsets are given wrt an array, create array */ 64 if (col_idx_type == SPBAS_OFFSET_ARRAY) 65 { 66 ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->icol0);CHKERRQ(ierr); 67 } 68 else 69 { 70 result->icol0 = PETSC_NULL; 71 } 72 73 /* If values are given, allocate values array */ 74 if (do_values) 75 { 76 ierr = PetscMalloc(nrows*sizeof(PetscScalar*),&result->values);CHKERRQ(ierr); 77 } 78 else 79 { 80 result->values = PETSC_NULL; 81 } 82 83 PetscFunctionReturn(0); 84 } 85 86 /* 87 spbas_allocate_data: 88 in case of block_data: 89 Allocate the data arrays alloc_icol and optionally alloc_val, 90 set appropriate pointers from icols and values; 91 in case of !block_data: 92 Allocate the arrays icols[i] and optionally values[i] 93 */ 94 #undef __FUNCT__ 95 #define __FUNCT__ "spbas_allocate_data" 96 PetscErrorCode spbas_allocate_data( spbas_matrix * result) 97 { 98 PetscInt i; 99 PetscInt nnz = result->nnz; 100 PetscInt nrows = result->nrows; 101 PetscInt r_nnz; 102 PetscErrorCode ierr; 103 PetscTruth do_values = (result->values != PETSC_NULL) ? PETSC_TRUE : PETSC_FALSE; 104 PetscTruth block_data = result->block_data; 105 106 PetscFunctionBegin; 107 if (block_data) 108 { 109 /* Allocate the column number array and point to it */ 110 result->n_alloc_icol = nnz; 111 ierr = PetscMalloc(nnz*sizeof(PetscInt), &result->alloc_icol);CHKERRQ(ierr); 112 result->icols[0]= result->alloc_icol; 113 for (i=1; i<nrows; i++) 114 { 115 result->icols[i] = result->icols[i-1] + result->row_nnz[i-1]; 116 } 117 118 /* Allocate the value array and point to it */ 119 if (do_values) 120 { 121 result->n_alloc_val= nnz; 122 ierr = PetscMalloc(nnz*sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr); 123 result->values[0]= result->alloc_val; 124 for (i=1; i<nrows; i++) 125 { 126 result->values[i] = result->values[i-1] + result->row_nnz[i-1]; 127 } 128 } 129 } 130 else 131 { 132 for (i=0; i<nrows; i++) 133 { 134 r_nnz = result->row_nnz[i]; 135 ierr = PetscMalloc(r_nnz*sizeof(PetscInt), &result->icols[i]);CHKERRQ(ierr); 136 } 137 if (do_values) 138 { 139 for (i=0; i<nrows; i++) 140 { 141 r_nnz = result->row_nnz[i]; 142 ierr = PetscMalloc(r_nnz*sizeof(PetscScalar), 143 &result->values[i]);CHKERRQ(ierr); 144 } 145 } 146 } 147 148 PetscFunctionReturn(0); 149 } 150 151 /* 152 spbas_row_order_icol 153 determine if row i1 should come 154 + before row i2 in the sorted rows (return -1), 155 + after (return 1) 156 + is identical (return 0). 157 */ 158 #undef __FUNCT__ 159 #define __FUNCT__ "spbas_row_order_icol" 160 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type) 161 { 162 PetscInt j; 163 PetscInt nnz1 = irow_in[i1+1] - irow_in[i1]; 164 PetscInt nnz2 = irow_in[i2+1] - irow_in[i2]; 165 166 PetscInt * icol1 = &icol_in[irow_in[i1]]; 167 PetscInt * icol2 = &icol_in[irow_in[i2]]; 168 169 if (nnz1<nnz2) {return -1;} 170 if (nnz1>nnz2) {return 1;} 171 172 if (col_idx_type == SPBAS_COLUMN_NUMBERS) 173 { 174 for (j=0; j<nnz1; j++) 175 { 176 if (icol1[j]< icol2[j]) {return -1;} 177 if (icol1[j]> icol2[j]) {return 1;} 178 } 179 } 180 else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) 181 { 182 for (j=0; j<nnz1; j++) 183 { 184 if (icol1[j]-i1< icol2[j]-i2) {return -1;} 185 if (icol1[j]-i1> icol2[j]-i2) {return 1;} 186 } 187 } 188 else if (col_idx_type == SPBAS_OFFSET_ARRAY) 189 { 190 for (j=1; j<nnz1; j++) 191 { 192 if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) {return -1;} 193 if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) {return 1;} 194 } 195 } 196 return 0; 197 } 198 199 /* 200 spbas_mergesort_icols: 201 return a sorting of the rows in which identical sparseness patterns are 202 next to each other 203 */ 204 #undef __FUNCT__ 205 #define __FUNCT__ "spbas_mergesort_icols" 206 PetscErrorCode spbas_mergesort_icols( PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort) 207 { 208 PetscErrorCode ierr; 209 PetscInt istep; /* Chunk-sizes of already sorted parts of arrays */ 210 211 PetscInt i, i1, i2; /* Loop counters for (partly) sorted arrays */ 212 213 PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both parts */ 214 215 PetscInt *ialloc; /* Allocated arrays */ 216 PetscInt *iswap; /* auxiliary pointers for swapping */ 217 PetscInt *ihlp1; /* Pointers to new version of arrays, */ 218 PetscInt *ihlp2; /* Pointers to previous version of arrays, */ 219 220 PetscFunctionBegin; 221 222 ierr = PetscMalloc(nrows*sizeof(PetscInt),&ialloc);CHKERRQ(ierr); 223 224 ihlp1 = ialloc; 225 ihlp2 = isort; 226 227 /* Sorted array chunks are first 1 long, and increase until they are the complete array */ 228 for (istep=1; istep<nrows; istep*=2) 229 { 230 /* 231 Combine sorted parts 232 istart:istart+istep-1 and istart+istep-1:istart+2*istep-1 233 of ihlp2 and vhlp2 234 235 into one sorted part 236 istart:istart+2*istep-1 237 of ihlp1 and vhlp1 238 */ 239 for (istart=0; istart<nrows; istart+=2*istep) 240 { 241 242 /* Set counters and bound array part endings */ 243 i1=istart; i1end = i1+istep; if (i1end>nrows) {i1end=nrows;} 244 i2=istart+istep; i2end = i2+istep; if (i2end>nrows) {i2end=nrows;} 245 246 /* Merge the two array parts */ 247 for (i=istart; i<i2end; i++) 248 { 249 if (i1<i1end && i2<i2end && 250 spbas_row_order_icol( ihlp2[i1], ihlp2[i2], 251 irow_in, icol_in, col_idx_type) < 0) 252 { 253 ihlp1[i] = ihlp2[i1]; 254 i1++; 255 } 256 else if (i2<i2end ) 257 { 258 ihlp1[i] = ihlp2[i2]; 259 i2++; 260 } 261 else 262 { 263 ihlp1[i] = ihlp2[i1]; 264 i1++; 265 } 266 } 267 } 268 269 /* Swap the two array sets */ 270 iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap; 271 } 272 273 /* Copy one more time in case the sorted arrays are the temporary ones */ 274 if (ihlp2 != isort) 275 { 276 for (i=0; i<nrows; i++) { isort[i] = ihlp2[i]; } 277 } 278 279 ierr = PetscFree(ialloc);CHKERRQ(ierr); 280 PetscFunctionReturn(0); 281 } 282 283 284 285 /* 286 spbas_compress_pattern: 287 calculate a compressed sparseness pattern for a sparseness pattern 288 given in compressed row storage. The compressed sparseness pattern may 289 require (much) less memory. 290 */ 291 #undef __FUNCT__ 292 #define __FUNCT__ "spbas_compress_pattern" 293 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, 294 PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction) 295 { 296 PetscInt nnz = irow_in[nrows]; 297 long int mem_orig = (nrows + nnz) * sizeof(PetscInt); 298 long int mem_compressed; 299 PetscErrorCode ierr; 300 PetscInt *isort; 301 PetscInt *icols; 302 PetscInt row_nnz; 303 PetscInt *ipoint; 304 PetscTruth *used; 305 PetscInt ptr; 306 PetscInt i,j; 307 308 const PetscTruth no_values = PETSC_FALSE; 309 310 PetscFunctionBegin; 311 312 /* Allocate the structure of the new matrix */ 313 B->nrows = nrows; 314 B->ncols = ncols; 315 B->nnz = nnz; 316 B->col_idx_type= col_idx_type; 317 B->block_data = PETSC_TRUE; 318 ierr = spbas_allocate_pattern( B, no_values);CHKERRQ(ierr); 319 320 /* When using an offset array, set it */ 321 if (col_idx_type==SPBAS_OFFSET_ARRAY) 322 { 323 for (i=0; i<nrows; i++) {B->icol0[i] = icol_in[irow_in[i]];} 324 } 325 326 /* Allocate the ordering for the rows */ 327 ierr = PetscMalloc(nrows*sizeof(PetscInt),&isort);CHKERRQ(ierr); 328 ierr = PetscMalloc(nrows*sizeof(PetscInt),&ipoint);CHKERRQ(ierr); 329 ierr = PetscMalloc(nrows*sizeof(PetscTruth),&used);CHKERRQ(ierr); 330 331 /* Initialize the sorting */ 332 memset((void*) used, 0, nrows*sizeof(PetscTruth)); 333 for (i = 0; i<nrows; i++) 334 { 335 B->row_nnz[i] = irow_in[i+1]-irow_in[i]; 336 isort[i] = i; 337 ipoint[i]= i; 338 } 339 340 /* Sort the rows so that identical columns will be next to each other */ 341 ierr = spbas_mergesort_icols( nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr); 342 ierr = PetscInfo(PETSC_NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr); 343 344 /* Replace identical rows with the first one in the list */ 345 for (i=1; i<nrows; i++) 346 { 347 if (spbas_row_order_icol(isort[i-1], isort[i], 348 irow_in, icol_in, col_idx_type) == 0) 349 { 350 ipoint[isort[i]] = ipoint[isort[i-1]]; 351 } 352 } 353 354 /* Collect the rows which are used*/ 355 for (i=0; i<nrows; i++) {used[ipoint[i]] = PETSC_TRUE;} 356 357 /* Calculate needed memory */ 358 B->n_alloc_icol = 0; 359 for (i=0; i<nrows; i++) 360 { 361 if (used[i]) {B->n_alloc_icol += B->row_nnz[i];} 362 } 363 364 ierr = PetscMalloc(B->n_alloc_icol*sizeof(PetscInt),&B->alloc_icol);CHKERRQ(ierr); 365 366 /* Fill in the diagonal offsets for the rows which store their own data */ 367 ptr = 0; 368 for (i=0; i<B->nrows; i++) 369 { 370 if (used[i]) 371 { 372 B->icols[i] = &B->alloc_icol[ptr]; 373 icols = &icol_in[irow_in[i]]; 374 row_nnz = B->row_nnz[i]; 375 if (col_idx_type == SPBAS_COLUMN_NUMBERS) 376 { 377 for (j=0; j<row_nnz; j++) 378 { 379 B->icols[i][j] = icols[j]; 380 } 381 } 382 else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) 383 { 384 for (j=0; j<row_nnz; j++) 385 { 386 B->icols[i][j] = icols[j]-i; 387 } 388 } 389 else if (col_idx_type == SPBAS_OFFSET_ARRAY) 390 { 391 for (j=0; j<row_nnz; j++) 392 { 393 B->icols[i][j] = icols[j]-icols[0]; 394 } 395 } 396 ptr += B->row_nnz[i]; 397 } 398 } 399 400 /* Point to the right places for all data */ 401 for (i=0; i<nrows; i++) 402 { 403 B->icols[i] = B->icols[ipoint[i]]; 404 } 405 ierr = PetscInfo(PETSC_NULL,"Row patterns have been compressed\n");CHKERRQ(ierr); 406 ierr = PetscInfo1(PETSC_NULL," (%G nonzeros per row)\n", (PetscReal) nnz / (PetscReal) nrows);CHKERRQ(ierr); 407 408 ierr=PetscFree(isort);CHKERRQ(ierr); 409 ierr=PetscFree(used);CHKERRQ(ierr); 410 ierr=PetscFree(ipoint);CHKERRQ(ierr); 411 412 mem_compressed = spbas_memory_requirement( *B ); 413 *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ 414 (PetscReal) mem_orig; 415 PetscFunctionReturn(0); 416 } 417 418 /* 419 spbas_incomplete_cholesky 420 Incomplete Cholesky decomposition 421 */ 422 #include "spbas_cholesky.h" 423 424 /* 425 spbas_delete : de-allocate the arrays owned by this matrix 426 */ 427 #undef __FUNCT__ 428 #define __FUNCT__ "spbas_delete" 429 PetscErrorCode spbas_delete(spbas_matrix matrix) 430 { 431 PetscInt i; 432 PetscErrorCode ierr; 433 PetscFunctionBegin; 434 if (matrix.block_data) 435 { 436 ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr) 437 if (matrix.values){ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);} 438 } 439 else 440 { 441 for (i=0; i<matrix.nrows; i++) 442 { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);} 443 ierr = PetscFree(matrix.icols);CHKERRQ(ierr); 444 if (matrix.values) 445 { 446 for (i=0; i<matrix.nrows; i++) 447 { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);} 448 } 449 } 450 451 ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr); 452 ierr=PetscFree(matrix.icols);CHKERRQ(ierr); 453 if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) 454 {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);} 455 if (matrix.values) 456 {ierr=PetscFree(matrix.values);CHKERRQ(ierr);} 457 458 PetscFunctionReturn(0); 459 } 460 461 /* 462 spbas_matrix_to_crs: 463 Convert an spbas_matrix to compessed row storage 464 */ 465 #undef __FUNCT__ 466 #define __FUNCT__ "spbas_matrix_to_crs" 467 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out) 468 { 469 PetscInt nrows = matrix_A.nrows; 470 PetscInt nnz = matrix_A.nnz; 471 PetscInt i,j,r_nnz,i0; 472 PetscInt *irow; 473 PetscInt *icol; 474 PetscInt *icol_A; 475 MatScalar *val; 476 PetscScalar *val_A; 477 PetscInt col_idx_type = matrix_A.col_idx_type; 478 PetscTruth do_values = matrix_A.values ? PETSC_TRUE : PETSC_FALSE; 479 PetscErrorCode ierr; 480 481 PetscFunctionBegin; 482 ierr = PetscMalloc( sizeof(PetscInt) * (nrows+1), &irow);CHKERRQ(ierr); 483 ierr = PetscMalloc( sizeof(PetscInt) * nnz, &icol);CHKERRQ(ierr); 484 *icol_out = icol; *irow_out=irow; 485 if (do_values) 486 { 487 ierr = PetscMalloc( sizeof(MatScalar) * nnz, &val);CHKERRQ(ierr); 488 *val_out = val; *icol_out = icol; *irow_out=irow; 489 } 490 491 irow[0]=0; 492 for (i=0; i<nrows; i++) 493 { 494 r_nnz = matrix_A.row_nnz[i]; 495 i0 = irow[i]; 496 irow[i+1] = i0 + r_nnz; 497 icol_A = matrix_A.icols[i]; 498 499 if (do_values) 500 { 501 val_A = matrix_A.values[i]; 502 for (j=0; j<r_nnz; j++) 503 { 504 icol[i0+j] = icol_A[j]; 505 val[i0+j] = val_A[j]; 506 } 507 } 508 else 509 { 510 for (j=0; j<r_nnz; j++) { icol[i0+j] = icol_A[j]; } 511 } 512 513 if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) 514 { 515 for (j=0; j<r_nnz; j++) { icol[i0+j] += i; } 516 } 517 else if (col_idx_type == SPBAS_OFFSET_ARRAY) 518 { 519 i0 = matrix_A.icol0[i]; 520 for (j=0; j<r_nnz; j++) { icol[i0+j] += i0; } 521 } 522 523 } 524 525 PetscFunctionReturn(0); 526 } 527 528 529 /* 530 spbas_transpose 531 return the transpose of a matrix 532 */ 533 #undef __FUNCT__ 534 #define __FUNCT__ "spbas_transpose" 535 PetscErrorCode spbas_transpose( spbas_matrix in_matrix, spbas_matrix * result) 536 { 537 PetscInt col_idx_type = in_matrix.col_idx_type; 538 PetscInt nnz = in_matrix.nnz; 539 PetscInt ncols = in_matrix.nrows; 540 PetscInt nrows = in_matrix.ncols; 541 PetscInt i,j,k; 542 PetscInt r_nnz; 543 PetscInt *irow; 544 PetscInt icol0; 545 PetscScalar * val; 546 PetscErrorCode ierr; 547 548 PetscFunctionBegin; 549 550 /* Copy input values */ 551 result->nrows = nrows; 552 result->ncols = ncols; 553 result->nnz = nnz; 554 result->col_idx_type = SPBAS_COLUMN_NUMBERS; 555 result->block_data = PETSC_TRUE; 556 557 /* Allocate sparseness pattern */ 558 ierr = spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr); 559 560 /* Count the number of nonzeros in each row */ 561 for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; } 562 563 for (i=0; i<ncols; i++) 564 { 565 r_nnz = in_matrix.row_nnz[i]; 566 irow = in_matrix.icols[i]; 567 if (col_idx_type == SPBAS_COLUMN_NUMBERS) 568 { 569 for (j=0; j<r_nnz; j++) { result->row_nnz[irow[j]]++; } 570 } 571 else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) 572 { 573 for (j=0; j<r_nnz; j++) { result->row_nnz[i+irow[j]]++; } 574 } 575 else if (col_idx_type == SPBAS_OFFSET_ARRAY) 576 { 577 icol0=in_matrix.icol0[i]; 578 for (j=0; j<r_nnz; j++) { result->row_nnz[icol0+irow[j]]++; } 579 } 580 } 581 582 /* Set the pointers to the data */ 583 ierr = spbas_allocate_data(result);CHKERRQ(ierr); 584 585 /* Reset the number of nonzeros in each row */ 586 for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; } 587 588 /* Fill the data arrays */ 589 if (in_matrix.values) 590 { 591 for (i=0; i<ncols; i++) 592 { 593 r_nnz = in_matrix.row_nnz[i]; 594 irow = in_matrix.icols[i]; 595 val = in_matrix.values[i]; 596 597 if (col_idx_type == SPBAS_COLUMN_NUMBERS) {icol0=0;} 598 else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;} 599 else if (col_idx_type == SPBAS_OFFSET_ARRAY) 600 {icol0=in_matrix.icol0[i];} 601 for (j=0; j<r_nnz; j++) 602 { 603 k = icol0 + irow[j]; 604 result->icols[k][result->row_nnz[k]] = i; 605 result->values[k][result->row_nnz[k]] = val[j]; 606 result->row_nnz[k]++; 607 } 608 } 609 } 610 else 611 { 612 for (i=0; i<ncols; i++) 613 { 614 r_nnz = in_matrix.row_nnz[i]; 615 irow = in_matrix.icols[i]; 616 617 if (col_idx_type == SPBAS_COLUMN_NUMBERS) {icol0=0;} 618 else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;} 619 else if (col_idx_type == SPBAS_OFFSET_ARRAY) 620 {icol0=in_matrix.icol0[i];} 621 622 for (j=0; j<r_nnz; j++) 623 { 624 k = icol0 + irow[j]; 625 result->icols[k][result->row_nnz[k]] = i; 626 result->row_nnz[k]++; 627 } 628 } 629 } 630 PetscFunctionReturn(0); 631 } 632 633 /* 634 spbas_mergesort 635 636 mergesort for an array of intergers and an array of associated 637 reals 638 639 on output, icol[0..nnz-1] is increasing; 640 val[0..nnz-1] has undergone the same permutation as icol 641 642 NB: val may be PETSC_NULL: in that case, only the integers are sorted 643 644 */ 645 #undef __FUNCT__ 646 #define __FUNCT__ "spbas_mergesort" 647 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val) 648 { 649 PetscInt istep; /* Chunk-sizes of already sorted parts of arrays */ 650 PetscInt i, i1, i2; /* Loop counters for (partly) sorted arrays */ 651 PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both parts */ 652 PetscInt *ialloc; /* Allocated arrays */ 653 PetscScalar *valloc=PETSC_NULL; 654 PetscInt *iswap; /* auxiliary pointers for swapping */ 655 PetscScalar *vswap; 656 PetscInt *ihlp1; /* Pointers to new version of arrays, */ 657 PetscScalar *vhlp1=PETSC_NULL; /* (arrays under construction) */ 658 PetscInt *ihlp2; /* Pointers to previous version of arrays, */ 659 PetscScalar *vhlp2=PETSC_NULL; 660 PetscErrorCode ierr; 661 662 ierr = PetscMalloc(nnz*sizeof(PetscInt),&ialloc);CHKERRQ(ierr); 663 ihlp1 = ialloc; 664 ihlp2 = icol; 665 666 if (val) 667 { 668 ierr = PetscMalloc(nnz*sizeof(PetscScalar),&valloc);CHKERRQ(ierr); 669 vhlp1 = valloc; 670 vhlp2 = val; 671 } 672 673 674 /* Sorted array chunks are first 1 long, and increase until they are the complete array */ 675 for (istep=1; istep<nnz; istep*=2) 676 { 677 /* 678 Combine sorted parts 679 istart:istart+istep-1 and istart+istep-1:istart+2*istep-1 680 of ihlp2 and vhlp2 681 682 into one sorted part 683 istart:istart+2*istep-1 684 of ihlp1 and vhlp1 685 */ 686 for (istart=0; istart<nnz; istart+=2*istep) 687 { 688 689 /* Set counters and bound array part endings */ 690 i1=istart; i1end = i1+istep; if (i1end>nnz) {i1end=nnz;} 691 i2=istart+istep; i2end = i2+istep; if (i2end>nnz) {i2end=nnz;} 692 693 /* Merge the two array parts */ 694 if (val) 695 { 696 for (i=istart; i<i2end; i++) 697 { 698 if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) 699 { 700 ihlp1[i] = ihlp2[i1]; 701 vhlp1[i] = vhlp2[i1]; 702 i1++; 703 } 704 else if (i2<i2end ) 705 { 706 ihlp1[i] = ihlp2[i2]; 707 vhlp1[i] = vhlp2[i2]; 708 i2++; 709 } 710 else 711 { 712 ihlp1[i] = ihlp2[i1]; 713 vhlp1[i] = vhlp2[i1]; 714 i1++; 715 } 716 } 717 } 718 else 719 { 720 for (i=istart; i<i2end; i++) 721 { 722 if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) 723 { 724 ihlp1[i] = ihlp2[i1]; 725 i1++; 726 } 727 else if (i2<i2end ) 728 { 729 ihlp1[i] = ihlp2[i2]; 730 i2++; 731 } 732 else 733 { 734 ihlp1[i] = ihlp2[i1]; 735 i1++; 736 } 737 } 738 } 739 } 740 741 /* Swap the two array sets */ 742 iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap; 743 vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap; 744 } 745 746 /* Copy one more time in case the sorted arrays are the temporary ones */ 747 if (ihlp2 != icol) 748 { 749 for (i=0; i<nnz; i++) { icol[i] = ihlp2[i]; } 750 if (val) 751 { 752 for (i=0; i<nnz; i++) { val[i] = vhlp2[i]; } 753 } 754 } 755 756 ierr = PetscFree(ialloc);CHKERRQ(ierr); 757 if(val){ierr = PetscFree(valloc);CHKERRQ(ierr);} 758 PetscFunctionReturn(0); 759 } 760 761 /* 762 spbas_apply_reordering_rows: 763 apply the given reordering to the rows: matrix_A = matrix_A(perm,:); 764 */ 765 #undef __FUNCT__ 766 #define __FUNCT__ "spbas_apply_reordering_rows" 767 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation) 768 { 769 PetscInt i,j,ip; 770 PetscInt nrows=matrix_A->nrows; 771 PetscInt * row_nnz; 772 PetscInt **icols; 773 PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; 774 PetscScalar **vals=PETSC_NULL; 775 PetscErrorCode ierr; 776 777 PetscFunctionBegin; 778 if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) 779 { 780 SETERRQ( PETSC_ERR_SUP_SYS, 781 "must have diagonal offsets in pattern\n"); 782 } 783 784 if (do_values) 785 { 786 ierr = PetscMalloc( sizeof(PetscScalar*)*nrows, &vals);CHKERRQ(ierr); 787 } 788 ierr = PetscMalloc( sizeof(PetscInt)*nrows, &row_nnz);CHKERRQ(ierr); 789 ierr = PetscMalloc( sizeof(PetscInt*)*nrows, &icols);CHKERRQ(ierr); 790 791 for (i=0; i<nrows;i++) 792 { 793 ip = permutation[i]; 794 if (do_values) {vals[i] = matrix_A->values[ip];} 795 icols[i] = matrix_A->icols[ip]; 796 row_nnz[i] = matrix_A->row_nnz[ip]; 797 for (j=0; j<row_nnz[i]; j++) { icols[i][j] += ip-i; } 798 } 799 800 if (do_values){ ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);} 801 ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr); 802 ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr); 803 804 if (do_values) { matrix_A->values = vals; } 805 matrix_A->icols = icols; 806 matrix_A->row_nnz = row_nnz; 807 808 PetscFunctionReturn(0); 809 } 810 811 812 /* 813 spbas_apply_reordering_cols: 814 apply the given reordering to the columns: matrix_A(:,perm) = matrix_A; 815 */ 816 #undef __FUNCT__ 817 #define __FUNCT__ "spbas_apply_reordering_cols" 818 PetscErrorCode spbas_apply_reordering_cols( spbas_matrix *matrix_A,const PetscInt *permutation) 819 { 820 PetscInt i,j; 821 PetscInt nrows=matrix_A->nrows; 822 PetscInt row_nnz; 823 PetscInt *icols; 824 PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; 825 PetscScalar *vals=PETSC_NULL; 826 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 831 if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) 832 { 833 SETERRQ( PETSC_ERR_SUP_SYS, 834 "must have diagonal offsets in pattern\n"); 835 } 836 837 for (i=0; i<nrows;i++) 838 { 839 icols = matrix_A->icols[i]; 840 row_nnz = matrix_A->row_nnz[i]; 841 if (do_values) { vals = matrix_A->values[i]; } 842 843 for (j=0; j<row_nnz; j++) 844 { 845 icols[j] = permutation[i+icols[j]]-i; 846 } 847 ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr); 848 } 849 850 PetscFunctionReturn(0); 851 } 852 853 /* 854 spbas_apply_reordering: 855 apply the given reordering: matrix_A(perm,perm) = matrix_A; 856 */ 857 #undef __FUNCT__ 858 #define __FUNCT__ "spbas_apply_reordering" 859 PetscErrorCode spbas_apply_reordering( spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm) 860 { 861 PetscErrorCode ierr; 862 PetscFunctionBegin; 863 ierr = spbas_apply_reordering_rows( matrix_A, inv_perm);CHKERRQ(ierr); 864 ierr = spbas_apply_reordering_cols( matrix_A, permutation);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "spbas_pattern_only" 870 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result) 871 { 872 spbas_matrix retval; 873 PetscInt i, j, i0, r_nnz; 874 PetscErrorCode ierr; 875 876 PetscFunctionBegin; 877 878 /* Copy input values */ 879 retval.nrows = nrows; 880 retval.ncols = ncols; 881 retval.nnz = ai[nrows]; 882 883 retval.block_data = PETSC_TRUE; 884 retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS; 885 886 /* Allocate output matrix */ 887 ierr = spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr); 888 for (i=0; i<nrows; i++) {retval.row_nnz[i] = ai[i+1]-ai[i];} 889 ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 890 /* Copy the structure */ 891 for (i = 0; i<retval.nrows; i++) 892 { 893 i0 = ai[i]; 894 r_nnz = ai[i+1]-i0; 895 896 for (j=0; j<r_nnz; j++) 897 { 898 retval.icols[i][j] = aj[i0+j]-i; 899 } 900 } 901 902 *result = retval; 903 PetscFunctionReturn(0); 904 } 905 906 907 /* 908 spbas_mark_row_power: 909 Mark the columns in row 'row' which are nonzero in 910 matrix^2log(marker). 911 */ 912 #undef __FUNCT__ 913 #define __FUNCT__ "spbas_mark_row_power" 914 PetscErrorCode spbas_mark_row_power( 915 PetscInt *iwork, /* marker-vector */ 916 PetscInt row, /* row for which the columns are marked */ 917 spbas_matrix * in_matrix, /* matrix for which the power is being calculated */ 918 PetscInt marker, /* marker-value: 2^power */ 919 PetscInt minmrk, /* lower bound for marked points */ 920 PetscInt maxmrk) /* upper bound for marked points */ 921 { 922 PetscErrorCode ierr; 923 PetscInt i,j, nnz; 924 925 PetscFunctionBegin; 926 nnz = in_matrix->row_nnz[row]; 927 928 /* For higher powers, call this function recursively */ 929 if (marker>1) 930 { 931 for (i=0; i<nnz;i++) 932 { 933 j = row + in_matrix->icols[row][i]; 934 if (minmrk<=j && j<maxmrk && iwork[j] < marker ) 935 { 936 ierr = spbas_mark_row_power( iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr); 937 iwork[j] |= marker; 938 } 939 } 940 } 941 else 942 { 943 /* Mark the columns reached */ 944 for (i=0; i<nnz;i++) 945 { 946 j = row + in_matrix->icols[row][i]; 947 if (minmrk<=j && j<maxmrk) { iwork[j] |= 1; } 948 } 949 } 950 951 PetscFunctionReturn(0); 952 } 953 954 955 /* 956 spbas_power 957 Calculate sparseness patterns for incomplete Cholesky decompositions 958 of a given order: (almost) all nonzeros of the matrix^(order+1) which 959 are inside the band width are found and stored in the output sparseness 960 pattern. 961 */ 962 #undef __FUNCT__ 963 #define __FUNCT__ "spbas_power" 964 PetscErrorCode spbas_power (spbas_matrix in_matrix,PetscInt power, spbas_matrix * result) 965 { 966 spbas_matrix retval; 967 PetscInt nrows = in_matrix.nrows; 968 PetscInt ncols = in_matrix.ncols; 969 PetscInt i, j, kend; 970 PetscInt nnz, inz; 971 PetscInt *iwork; 972 PetscInt marker; 973 PetscInt maxmrk=0; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) 978 { 979 SETERRQ( PETSC_ERR_SUP_SYS, 980 "must have diagonal offsets in pattern\n"); 981 } 982 983 if (ncols != nrows) 984 { 985 SETERRQ( PETSC_ERR_ARG_INCOMP, 986 "Dimension error\n"); 987 } 988 989 if (in_matrix.values) 990 { 991 SETERRQ( PETSC_ERR_ARG_INCOMP, 992 "Input array must be sparseness pattern (no values)"); 993 } 994 995 if (power<=0) 996 { 997 SETERRQ( PETSC_ERR_SUP_SYS, 998 "Power must be 1 or up"); 999 } 1000 1001 /* Copy input values*/ 1002 retval.nrows = ncols; 1003 retval.ncols = nrows; 1004 retval.nnz = 0; 1005 retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS; 1006 retval.block_data = PETSC_FALSE; 1007 1008 /* Allocate sparseness pattern */ 1009 ierr = spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr); 1010 1011 /* Allocate marker array */ 1012 ierr = PetscMalloc(nrows * sizeof(PetscInt), &iwork);CHKERRQ(ierr); 1013 1014 /* Erase the pattern for this row */ 1015 memset( (void *) iwork, 0, retval.nrows*sizeof(PetscInt)); 1016 1017 /* Calculate marker values */ 1018 marker = 1; for (i=1; i<power; i++) {marker*=2;} 1019 1020 for (i=0; i<nrows; i++) 1021 { 1022 /* Calculate the pattern for each row */ 1023 1024 nnz = in_matrix.row_nnz[i]; 1025 kend = i+in_matrix.icols[i][nnz-1]; 1026 if (maxmrk<=kend) {maxmrk=kend+1;} 1027 ierr = spbas_mark_row_power( iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr); 1028 1029 /* Count the columns*/ 1030 nnz = 0; 1031 for (j=i; j<maxmrk; j++) {nnz+= (iwork[j]!=0);} 1032 1033 /* Allocate the column indices */ 1034 retval.row_nnz[i] = nnz; 1035 ierr = PetscMalloc(nnz*sizeof(PetscInt),&retval.icols[i]);CHKERRQ(ierr); 1036 1037 /* Administrate the column indices */ 1038 inz = 0; 1039 for (j=i; j<maxmrk; j++) 1040 { 1041 if (iwork[j]) 1042 { 1043 retval.icols[i][inz] = j-i; 1044 inz++; 1045 iwork[j]=0; 1046 } 1047 } 1048 retval.nnz += nnz; 1049 }; 1050 1051 ierr = PetscFree(iwork);CHKERRQ(ierr); 1052 1053 *result = retval; 1054 PetscFunctionReturn(0); 1055 } 1056 1057 1058 1059 /* 1060 spbas_keep_upper: 1061 remove the lower part of the matrix: keep the upper part 1062 */ 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "spbas_keep_upper" 1065 PetscErrorCode spbas_keep_upper( spbas_matrix * inout_matrix) 1066 { 1067 PetscInt i, j; 1068 PetscInt jstart; 1069 1070 PetscFunctionBegin; 1071 if (inout_matrix->block_data) 1072 { 1073 SETERRQ( PETSC_ERR_SUP_SYS, 1074 "Not yet for block data matrices\n"); 1075 } 1076 1077 for (i=0; i<inout_matrix->nrows; i++) 1078 { 1079 for (jstart=0; 1080 (jstart<inout_matrix->row_nnz[i]) && 1081 (inout_matrix->icols[i][jstart]<0); jstart++){} 1082 1083 if (jstart>0) 1084 { 1085 for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) 1086 { 1087 inout_matrix->icols[i][j] = 1088 inout_matrix->icols[i][j+jstart]; 1089 } 1090 1091 if (inout_matrix->values) 1092 { 1093 for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) 1094 { 1095 inout_matrix->values[i][j] = 1096 inout_matrix->values[i][j+jstart]; 1097 } 1098 } 1099 1100 inout_matrix->row_nnz[i] -= jstart; 1101 1102 inout_matrix->icols[i] = (PetscInt*) realloc( 1103 (void*) inout_matrix->icols[i], 1104 inout_matrix->row_nnz[i]*sizeof(PetscInt)); 1105 1106 if (inout_matrix->values) 1107 { 1108 inout_matrix->values[i] = (PetscScalar*) realloc( 1109 (void*) inout_matrix->values[i], 1110 inout_matrix->row_nnz[i]*sizeof(PetscScalar)); 1111 } 1112 inout_matrix->nnz -= jstart; 1113 } 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118