1 #include "petsc.h" 2 #include "../src/mat/impls/aij/seq/aij.h" 3 #include "spbas.h" 4 5 6 7 8 /* 9 spbas_memory_requirement: 10 Calculate the number of bytes needed to store tha matrix 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "spbas_memory_requirement" 14 long int spbas_memory_requirement( spbas_matrix matrix) 15 { 16 17 18 // Requirement for 19 long int memreq = 6 * sizeof(PetscInt) + // nrows, ncols, nnz, n_alloc_icol, 20 // n_alloc_val, col_idx_type 21 sizeof(PetscTruth) + // block_data 22 sizeof(PetscScalar**) + // values 23 sizeof(PetscScalar*) + // alloc_val 24 2 * sizeof(int**) + // icols, icols0 25 2 * sizeof(PetscInt*) + // row_nnz, alloc_icol 26 matrix.nrows * sizeof(PetscInt) + // row_nnz[*] 27 matrix.nrows * sizeof(PetscInt*); // icols[*] 28 29 // icol0[*] 30 if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) 31 { memreq += matrix.nrows * sizeof(PetscInt); } 32 33 // icols[*][*] 34 if (matrix.block_data) 35 { memreq += matrix.n_alloc_icol * sizeof(PetscInt); } 36 else 37 { memreq += matrix.nnz * sizeof(PetscInt); } 38 39 if (matrix.values) 40 { 41 memreq += matrix.nrows * sizeof(PetscScalar*); // values[*] 42 // values[*][*] 43 if (matrix.block_data) 44 { memreq += matrix.n_alloc_val * sizeof(PetscScalar); } 45 else 46 { memreq += matrix.nnz * sizeof(PetscScalar); } 47 } 48 49 return memreq; 50 } 51 52 /* 53 spbas_allocate_pattern: 54 allocate the pattern arrays row_nnz, icols and optionally values 55 */ 56 #undef __FUNCT__ 57 #define __FUNCT__ "spbas_allocate_pattern" 58 PetscErrorCode spbas_allocate_pattern( spbas_matrix * result, PetscTruth do_values) 59 { 60 PetscErrorCode ierr; 61 PetscInt nrows = result->nrows; 62 PetscInt col_idx_type = result->col_idx_type; 63 64 PetscFunctionBegin; 65 // Allocate sparseness pattern 66 ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->row_nnz); CHKERRQ(ierr); 67 ierr = PetscMalloc(nrows*sizeof(PetscInt*),&result->icols); CHKERRQ(ierr); 68 69 // If offsets are given wrt an array, create array 70 if (col_idx_type == SPBAS_OFFSET_ARRAY) 71 { 72 ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->icol0); CHKERRQ(ierr); 73 } 74 else 75 { 76 result->icol0 = NULL; 77 } 78 79 // If values are given, allocate values array 80 if (do_values) 81 { 82 ierr = PetscMalloc(nrows*sizeof(PetscScalar*),&result->values); 83 CHKERRQ(ierr); 84 } 85 else 86 { 87 result->values = NULL; 88 } 89 90 PetscFunctionReturn(0); 91 } 92 93 /* 94 spbas_allocate_data: 95 in case of block_data: 96 Allocate the data arrays alloc_icol and optionally alloc_val, 97 set appropriate pointers from icols and values; 98 in case of !block_data: 99 Allocate the arrays icols[i] and optionally values[i] 100 */ 101 #undef __FUNCT__ 102 #define __FUNCT__ "spbas_allocate_data" 103 PetscErrorCode spbas_allocate_data( spbas_matrix * result) 104 { 105 PetscInt i; 106 PetscInt nnz = result->nnz; 107 PetscInt nrows = result->nrows; 108 PetscInt r_nnz; 109 PetscErrorCode ierr; 110 PetscTruth do_values = (result->values != NULL) ? PETSC_TRUE : PETSC_FALSE; 111 PetscTruth block_data = result->block_data; 112 113 PetscFunctionBegin; 114 if (block_data) 115 { 116 // Allocate the column number array and point to it 117 result->n_alloc_icol = nnz; 118 ierr = PetscMalloc(nnz*sizeof(PetscInt), &result->alloc_icol); 119 CHKERRQ(ierr); 120 result->icols[0]= result->alloc_icol; 121 for (i=1; i<nrows; i++) 122 { 123 result->icols[i] = result->icols[i-1] + result->row_nnz[i-1]; 124 } 125 126 // Allocate the value array and point to it 127 if (do_values) 128 { 129 result->n_alloc_val= nnz; 130 ierr = PetscMalloc(nnz*sizeof(PetscScalar), &result->alloc_val); 131 CHKERRQ(ierr); 132 result->values[0]= result->alloc_val; 133 for (i=1; i<nrows; i++) 134 { 135 result->values[i] = result->values[i-1] + result->row_nnz[i-1]; 136 } 137 } 138 } 139 else 140 { 141 for (i=0; i<nrows; i++) 142 { 143 r_nnz = result->row_nnz[i]; 144 ierr = PetscMalloc(r_nnz*sizeof(PetscInt), &result->icols[i]); 145 CHKERRQ(ierr); 146 } 147 if (do_values) 148 { 149 for (i=0; i<nrows; i++) 150 { 151 r_nnz = result->row_nnz[i]; 152 ierr = PetscMalloc(r_nnz*sizeof(PetscScalar), 153 &result->values[i]); CHKERRQ(ierr); 154 } 155 } 156 } 157 158 PetscFunctionReturn(0); 159 } 160 161 /* 162 spbas_row_order_icol 163 determine if row i1 should come 164 + before row i2 in the sorted rows (return -1), 165 + after (return 1) 166 + is identical (return 0). 167 */ 168 #undef __FUNCT__ 169 #define __FUNCT__ "spbas_row_order_icol" 170 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type) 171 { 172 PetscInt j; 173 PetscInt nnz1 = irow_in[i1+1] - irow_in[i1]; 174 PetscInt nnz2 = irow_in[i2+1] - irow_in[i2]; 175 176 PetscInt * icol1 = &icol_in[irow_in[i1]]; 177 PetscInt * icol2 = &icol_in[irow_in[i2]]; 178 179 if (nnz1<nnz2) {return -1;} 180 if (nnz1>nnz2) {return 1;} 181 182 if (col_idx_type == SPBAS_COLUMN_NUMBERS) 183 { 184 for (j=0; j<nnz1; j++) 185 { 186 if (icol1[j]< icol2[j]) {return -1;} 187 if (icol1[j]> icol2[j]) {return 1;} 188 } 189 } 190 else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) 191 { 192 for (j=0; j<nnz1; j++) 193 { 194 if (icol1[j]-i1< icol2[j]-i2) {return -1;} 195 if (icol1[j]-i1> icol2[j]-i2) {return 1;} 196 } 197 } 198 else if (col_idx_type == SPBAS_OFFSET_ARRAY) 199 { 200 for (j=1; j<nnz1; j++) 201 { 202 if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) {return -1;} 203 if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) {return 1;} 204 } 205 } 206 return 0; 207 } 208 209 /* 210 spbas_mergesort_icols: 211 return a sorting of the rows in which identical sparseness patterns are 212 next to each other 213 */ 214 #undef __FUNCT__ 215 #define __FUNCT__ "spbas_mergesort_icols" 216 PetscErrorCode spbas_mergesort_icols( PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort) 217 { 218 PetscErrorCode ierr; 219 PetscInt istep; // Chunk-sizes of already sorted parts of arrays 220 221 PetscInt i, i1, i2; // Loop counters for (partly) sorted arrays 222 223 PetscInt istart, i1end, i2end; // start of newly sorted array part, end of both 224 // parts 225 226 PetscInt *ialloc; // Allocated arrays 227 PetscInt *iswap; // auxiliary pointers for swapping 228 PetscInt *ihlp1; // Pointers to new version of arrays, 229 PetscInt *ihlp2; // Pointers to previous version of arrays, 230 231 PetscFunctionBegin; 232 233 // Create arrays 234 ierr = PetscMalloc(nrows*sizeof(PetscInt),&ialloc); CHKERRQ(ierr); 235 236 ihlp1 = ialloc; 237 ihlp2 = isort; 238 239 // Sorted array chunks are first 1 long, and increase until they are the 240 // complete array 241 for (istep=1; istep<nrows; istep*=2) 242 { 243 // 244 // Combine sorted parts 245 // istart:istart+istep-1 and istart+istep-1:istart+2*istep-1 246 // of ihlp2 and vhlp2 247 // 248 // into one sorted part 249 // istart:istart+2*istep-1 250 // of ihlp1 and vhlp1 251 // 252 for (istart=0; istart<nrows; istart+=2*istep) 253 { 254 255 // Set counters and bound array part endings 256 i1=istart; i1end = i1+istep; if (i1end>nrows) {i1end=nrows;} 257 i2=istart+istep; i2end = i2+istep; if (i2end>nrows) {i2end=nrows;} 258 259 // Merge the two array parts 260 for (i=istart; i<i2end; i++) 261 { 262 if (i1<i1end && i2<i2end && 263 spbas_row_order_icol( ihlp2[i1], ihlp2[i2], 264 irow_in, icol_in, col_idx_type) < 0) 265 { 266 ihlp1[i] = ihlp2[i1]; 267 i1++; 268 } 269 else if (i2<i2end ) 270 { 271 ihlp1[i] = ihlp2[i2]; 272 i2++; 273 } 274 else 275 { 276 ihlp1[i] = ihlp2[i1]; 277 i1++; 278 } 279 } 280 } 281 282 // Swap the two array sets 283 iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap; 284 } 285 286 // Copy one more time in case the sorted arrays are the temporary ones 287 if (ihlp2 != isort) 288 { 289 for (i=0; i<nrows; i++) { isort[i] = ihlp2[i]; } 290 } 291 292 // Remove help arrays 293 ierr = PetscFree(ialloc); CHKERRQ(ierr); 294 PetscFunctionReturn(0); 295 } 296 297 298 299 /* 300 spbas_compress_pattern: 301 calculate a compressed sparseness pattern for a sparseness pattern 302 given in compressed row storage. The compressed sparseness pattern may 303 require (much) less memory. 304 */ 305 #undef __FUNCT__ 306 #define __FUNCT__ "spbas_compress_pattern" 307 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, 308 PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction) 309 { 310 PetscInt nnz = irow_in[nrows]; 311 long int mem_orig = (nrows + nnz) * sizeof(PetscInt); 312 long int mem_compressed; 313 PetscErrorCode ierr; 314 PetscInt *isort; 315 PetscInt *icols; 316 PetscInt row_nnz; 317 PetscInt *ipoint; 318 PetscTruth *used; 319 PetscInt ptr; 320 PetscInt i,j; 321 322 const PetscTruth no_values = PETSC_FALSE; 323 324 PetscFunctionBegin; 325 326 // Allocate the structure of the new matrix 327 B->nrows = nrows; 328 B->ncols = ncols; 329 B->nnz = nnz; 330 B->col_idx_type= col_idx_type; 331 B->block_data = PETSC_TRUE; 332 ierr = spbas_allocate_pattern( B, no_values); CHKERRQ(ierr); 333 334 // When using an offset array, set it 335 if (col_idx_type==SPBAS_OFFSET_ARRAY) 336 { 337 for (i=0; i<nrows; i++) {B->icol0[i] = icol_in[irow_in[i]];} 338 } 339 340 // Allocate the ordering for the rows 341 ierr = PetscMalloc(nrows*sizeof(PetscInt),&isort); CHKERRQ(ierr); 342 ierr = PetscMalloc(nrows*sizeof(PetscInt),&ipoint); CHKERRQ(ierr); 343 ierr = PetscMalloc(nrows*sizeof(PetscTruth),&used); CHKERRQ(ierr); 344 345 // Initialize the sorting 346 memset((void*) used, 0, nrows*sizeof(PetscTruth)); 347 for (i = 0; i<nrows; i++) 348 { 349 B->row_nnz[i] = irow_in[i+1]-irow_in[i]; 350 isort[i] = i; 351 ipoint[i]= i; 352 } 353 354 // Sort the rows so that identical columns will be next to each other 355 ierr = spbas_mergesort_icols( nrows, irow_in, icol_in, col_idx_type, isort); CHKERRQ(ierr); 356 printf("Rows have been sorted for patterns\n"); 357 358 // Replace identical rows with the first one in the list 359 for (i=1; i<nrows; i++) 360 { 361 if (spbas_row_order_icol(isort[i-1], isort[i], 362 irow_in, icol_in, col_idx_type) == 0) 363 { 364 ipoint[isort[i]] = ipoint[isort[i-1]]; 365 } 366 } 367 368 // Collect the rows which are used 369 for (i=0; i<nrows; i++) {used[ipoint[i]] = PETSC_TRUE;} 370 371 // Calculate needed memory 372 B->n_alloc_icol = 0; 373 for (i=0; i<nrows; i++) 374 { 375 if (used[i]) {B->n_alloc_icol += B->row_nnz[i];} 376 } 377 378 ierr = PetscMalloc(B->n_alloc_icol*sizeof(PetscInt),&B->alloc_icol); 379 CHKERRQ(ierr); 380 381 // Fill in the diagonal offsets for the rows which store their own data 382 ptr = 0; 383 for (i=0; i<B->nrows; i++) 384 { 385 if (used[i]) 386 { 387 B->icols[i] = &B->alloc_icol[ptr]; 388 icols = &icol_in[irow_in[i]]; 389 row_nnz = B->row_nnz[i]; 390 if (col_idx_type == SPBAS_COLUMN_NUMBERS) 391 { 392 for (j=0; j<row_nnz; j++) 393 { 394 B->icols[i][j] = icols[j]; 395 } 396 } 397 else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) 398 { 399 for (j=0; j<row_nnz; j++) 400 { 401 B->icols[i][j] = icols[j]-i; 402 } 403 } 404 else if (col_idx_type == SPBAS_OFFSET_ARRAY) 405 { 406 for (j=0; j<row_nnz; j++) 407 { 408 B->icols[i][j] = icols[j]-icols[0]; 409 } 410 } 411 ptr += B->row_nnz[i]; 412 } 413 } 414 415 // Point to the right places for all data 416 for (i=0; i<nrows; i++) 417 { 418 B->icols[i] = B->icols[ipoint[i]]; 419 } 420 printf("Row patterns have been compressed\n"); 421 printf(" (%6.2f nonzeros per row)\n", (PetscReal) nnz / (PetscReal) nrows); 422 423 424 // Clean up 425 ierr=PetscFree(isort); CHKERRQ(ierr); 426 ierr=PetscFree(used); CHKERRQ(ierr); 427 ierr=PetscFree(ipoint); CHKERRQ(ierr); 428 429 mem_compressed = spbas_memory_requirement( *B ); 430 *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ 431 (PetscReal) mem_orig; 432 PetscFunctionReturn(0); 433 } 434 435 /* 436 spbas_incomplete_cholesky 437 Incomplete Cholesky decomposition 438 */ 439 #include "spbas_cholesky.h" 440 441 /* 442 spbas_delete : de-allocate the arrays owned by this matrix 443 */ 444 #undef __FUNCT__ 445 #define __FUNCT__ "spbas_delete" 446 PetscErrorCode spbas_delete(spbas_matrix matrix) 447 { 448 PetscInt i; 449 PetscErrorCode ierr; 450 PetscFunctionBegin; 451 if (matrix.block_data) 452 { 453 ierr=PetscFree(matrix.alloc_icol); CHKERRQ(ierr) 454 if (matrix.values){ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);} 455 } 456 else 457 { 458 for (i=0; i<matrix.nrows; i++) 459 { ierr=PetscFree(matrix.icols[i]); CHKERRQ(ierr);} 460 ierr = PetscFree(matrix.icols);CHKERRQ(ierr); 461 if (matrix.values) 462 { 463 for (i=0; i<matrix.nrows; i++) 464 { ierr=PetscFree(matrix.values[i]); CHKERRQ(ierr);} 465 } 466 } 467 468 ierr=PetscFree(matrix.row_nnz); CHKERRQ(ierr); 469 ierr=PetscFree(matrix.icols); CHKERRQ(ierr); 470 if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) 471 {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);} 472 if (matrix.values) 473 {ierr=PetscFree(matrix.values);CHKERRQ(ierr);} 474 475 PetscFunctionReturn(0); 476 } 477 478 /* 479 spbas_matrix_to_crs: 480 Convert an spbas_matrix to compessed row storage 481 */ 482 #undef __FUNCT__ 483 #define __FUNCT__ "spbas_matrix_to_crs" 484 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out) 485 { 486 PetscInt nrows = matrix_A.nrows; 487 PetscInt nnz = matrix_A.nnz; 488 PetscInt i,j,r_nnz,i0; 489 PetscInt *irow; 490 PetscInt *icol; 491 PetscInt *icol_A; 492 MatScalar *val; 493 PetscScalar *val_A; 494 PetscInt col_idx_type = matrix_A.col_idx_type; 495 PetscTruth do_values = matrix_A.values ? PETSC_TRUE : PETSC_FALSE; 496 PetscErrorCode ierr; 497 498 PetscFunctionBegin; 499 ierr = PetscMalloc( sizeof(PetscInt) * (nrows+1), &irow); CHKERRQ(ierr); 500 ierr = PetscMalloc( sizeof(PetscInt) * nnz, &icol); CHKERRQ(ierr); 501 *icol_out = icol; *irow_out=irow; 502 if (do_values) 503 { 504 ierr = PetscMalloc( sizeof(MatScalar) * nnz, &val); CHKERRQ(ierr); 505 *val_out = val; *icol_out = icol; *irow_out=irow; 506 } 507 508 irow[0]=0; 509 for (i=0; i<nrows; i++) 510 { 511 r_nnz = matrix_A.row_nnz[i]; 512 i0 = irow[i]; 513 irow[i+1] = i0 + r_nnz; 514 icol_A = matrix_A.icols[i]; 515 516 if (do_values) 517 { 518 val_A = matrix_A.values[i]; 519 for (j=0; j<r_nnz; j++) 520 { 521 icol[i0+j] = icol_A[j]; 522 val[i0+j] = val_A[j]; 523 } 524 } 525 else 526 { 527 for (j=0; j<r_nnz; j++) { icol[i0+j] = icol_A[j]; } 528 } 529 530 if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) 531 { 532 for (j=0; j<r_nnz; j++) { icol[i0+j] += i; } 533 } 534 else if (col_idx_type == SPBAS_OFFSET_ARRAY) 535 { 536 i0 = matrix_A.icol0[i]; 537 for (j=0; j<r_nnz; j++) { icol[i0+j] += i0; } 538 } 539 540 } 541 542 PetscFunctionReturn(0); 543 } 544 545 546 /* 547 spbas_transpose 548 return the transpose of a matrix 549 */ 550 #undef __FUNCT__ 551 #define __FUNCT__ "spbas_transpose" 552 PetscErrorCode spbas_transpose( spbas_matrix in_matrix, spbas_matrix * result) 553 { 554 PetscInt col_idx_type = in_matrix.col_idx_type; 555 PetscInt nnz = in_matrix.nnz; 556 PetscInt ncols = in_matrix.nrows; 557 PetscInt nrows = in_matrix.ncols; 558 PetscInt i,j,k; 559 PetscInt r_nnz; 560 PetscInt *irow; 561 PetscInt icol0; 562 PetscScalar * val; 563 PetscErrorCode ierr; 564 565 PetscFunctionBegin; 566 567 // Copy input values 568 result->nrows = nrows; 569 result->ncols = ncols; 570 result->nnz = nnz; 571 result->col_idx_type = SPBAS_COLUMN_NUMBERS; 572 result->block_data = PETSC_TRUE; 573 574 // Allocate sparseness pattern 575 ierr = spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE); 576 CHKERRQ(ierr); 577 578 // Count the number of nonzeros in each row 579 for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; } 580 581 for (i=0; i<ncols; i++) 582 { 583 r_nnz = in_matrix.row_nnz[i]; 584 irow = in_matrix.icols[i]; 585 if (col_idx_type == SPBAS_COLUMN_NUMBERS) 586 { 587 for (j=0; j<r_nnz; j++) { result->row_nnz[irow[j]]++; } 588 } 589 else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) 590 { 591 for (j=0; j<r_nnz; j++) { result->row_nnz[i+irow[j]]++; } 592 } 593 else if (col_idx_type == SPBAS_OFFSET_ARRAY) 594 { 595 icol0=in_matrix.icol0[i]; 596 for (j=0; j<r_nnz; j++) { result->row_nnz[icol0+irow[j]]++; } 597 } 598 } 599 600 // Set the pointers to the data 601 ierr = spbas_allocate_data(result); CHKERRQ(ierr); 602 603 // Reset the number of nonzeros in each row 604 for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; } 605 606 // Fill the data arrays 607 if (in_matrix.values) 608 { 609 for (i=0; i<ncols; i++) 610 { 611 r_nnz = in_matrix.row_nnz[i]; 612 irow = in_matrix.icols[i]; 613 val = in_matrix.values[i]; 614 615 if (col_idx_type == SPBAS_COLUMN_NUMBERS) {icol0=0;} 616 else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;} 617 else if (col_idx_type == SPBAS_OFFSET_ARRAY) 618 {icol0=in_matrix.icol0[i];} 619 for (j=0; j<r_nnz; j++) 620 { 621 k = icol0 + irow[j]; 622 result->icols[k][result->row_nnz[k]] = i; 623 result->values[k][result->row_nnz[k]] = val[j]; 624 result->row_nnz[k]++; 625 } 626 } 627 } 628 else 629 { 630 for (i=0; i<ncols; i++) 631 { 632 r_nnz = in_matrix.row_nnz[i]; 633 irow = in_matrix.icols[i]; 634 635 if (col_idx_type == SPBAS_COLUMN_NUMBERS) {icol0=0;} 636 else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;} 637 else if (col_idx_type == SPBAS_OFFSET_ARRAY) 638 {icol0=in_matrix.icol0[i];} 639 640 for (j=0; j<r_nnz; j++) 641 { 642 k = icol0 + irow[j]; 643 result->icols[k][result->row_nnz[k]] = i; 644 result->row_nnz[k]++; 645 } 646 } 647 } 648 649 // Set return value 650 PetscFunctionReturn(0); 651 } 652 653 /* 654 spbas_mergesort 655 656 mergesort for an array of intergers and an array of associated 657 reals 658 659 on output, icol[0..nnz-1] is increasing; 660 val[0..nnz-1] has undergone the same permutation as icol 661 662 NB: val may be NULL: in that case, only the integers are sorted 663 664 */ 665 #undef __FUNCT__ 666 #define __FUNCT__ "spbas_mergesort" 667 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val) 668 { 669 PetscInt istep; // Chunk-sizes of already sorted parts of arrays 670 PetscInt i, i1, i2; // Loop counters for (partly) sorted arrays 671 PetscInt istart, i1end, i2end; // start of newly sorted array part, end of both parts 672 PetscInt *ialloc; // Allocated arrays 673 PetscScalar *valloc=NULL; 674 PetscInt *iswap; // auxiliary pointers for swapping 675 PetscScalar *vswap; 676 PetscInt *ihlp1; // Pointers to new version of arrays, 677 PetscScalar *vhlp1=NULL; // (arrays under construction) 678 PetscInt *ihlp2; // Pointers to previous version of arrays, 679 PetscScalar *vhlp2=NULL; 680 PetscErrorCode ierr; 681 682 // Create arrays 683 ierr = PetscMalloc(nnz*sizeof(PetscInt),&ialloc); CHKERRQ(ierr); 684 ihlp1 = ialloc; 685 ihlp2 = icol; 686 687 if (val) 688 { 689 ierr = PetscMalloc(nnz*sizeof(PetscScalar),&valloc); CHKERRQ(ierr); 690 vhlp1 = valloc; 691 vhlp2 = val; 692 } 693 694 695 // Sorted array chunks are first 1 long, and increase until they are the 696 // complete array 697 for (istep=1; istep<nnz; istep*=2) 698 { 699 // 700 // Combine sorted parts 701 // istart:istart+istep-1 and istart+istep-1:istart+2*istep-1 702 // of ihlp2 and vhlp2 703 // 704 // into one sorted part 705 // istart:istart+2*istep-1 706 // of ihlp1 and vhlp1 707 // 708 for (istart=0; istart<nnz; istart+=2*istep) 709 { 710 711 // Set counters and bound array part endings 712 i1=istart; i1end = i1+istep; if (i1end>nnz) {i1end=nnz;} 713 i2=istart+istep; i2end = i2+istep; if (i2end>nnz) {i2end=nnz;} 714 715 // Merge the two array parts 716 if (val) 717 { 718 for (i=istart; i<i2end; i++) 719 { 720 if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) 721 { 722 ihlp1[i] = ihlp2[i1]; 723 vhlp1[i] = vhlp2[i1]; 724 i1++; 725 } 726 else if (i2<i2end ) 727 { 728 ihlp1[i] = ihlp2[i2]; 729 vhlp1[i] = vhlp2[i2]; 730 i2++; 731 } 732 else 733 { 734 ihlp1[i] = ihlp2[i1]; 735 vhlp1[i] = vhlp2[i1]; 736 i1++; 737 } 738 } 739 } 740 else 741 { 742 for (i=istart; i<i2end; i++) 743 { 744 if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) 745 { 746 ihlp1[i] = ihlp2[i1]; 747 i1++; 748 } 749 else if (i2<i2end ) 750 { 751 ihlp1[i] = ihlp2[i2]; 752 i2++; 753 } 754 else 755 { 756 ihlp1[i] = ihlp2[i1]; 757 i1++; 758 } 759 } 760 } 761 } 762 763 // Swap the two array sets 764 iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap; 765 vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap; 766 } 767 768 // Copy one more time in case the sorted arrays are the temporary ones 769 if (ihlp2 != icol) 770 { 771 for (i=0; i<nnz; i++) { icol[i] = ihlp2[i]; } 772 if (val) 773 { 774 for (i=0; i<nnz; i++) { val[i] = vhlp2[i]; } 775 } 776 } 777 778 // Remove help arrays 779 ierr = PetscFree(ialloc); CHKERRQ(ierr); 780 if(val){ierr = PetscFree(valloc); CHKERRQ(ierr);} 781 PetscFunctionReturn(0); 782 } 783 784 /* 785 spbas_apply_reordering_rows: 786 apply the given reordering to the rows: matrix_A = matrix_A(perm,:); 787 */ 788 #undef __FUNCT__ 789 #define __FUNCT__ "spbas_apply_reordering_rows" 790 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation) 791 { 792 PetscInt i,j,ip; 793 PetscInt nrows=matrix_A->nrows; 794 PetscInt * row_nnz; 795 PetscInt **icols; 796 PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; 797 PetscScalar **vals=NULL; 798 PetscErrorCode ierr; 799 800 PetscFunctionBegin; 801 if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) 802 { 803 SETERRQ( PETSC_ERR_SUP_SYS, 804 "must have diagonal offsets in pattern\n"); 805 } 806 807 if (do_values) 808 { 809 ierr = PetscMalloc( sizeof(PetscScalar*)*nrows, &vals); CHKERRQ(ierr); 810 } 811 ierr = PetscMalloc( sizeof(PetscInt)*nrows, &row_nnz); CHKERRQ(ierr); 812 ierr = PetscMalloc( sizeof(PetscInt*)*nrows, &icols); CHKERRQ(ierr); 813 814 for (i=0; i<nrows;i++) 815 { 816 ip = permutation[i]; 817 if (do_values) {vals[i] = matrix_A->values[ip];} 818 icols[i] = matrix_A->icols[ip]; 819 row_nnz[i] = matrix_A->row_nnz[ip]; 820 for (j=0; j<row_nnz[i]; j++) { icols[i][j] += ip-i; } 821 } 822 823 if (do_values){ ierr = PetscFree(matrix_A->values); CHKERRQ(ierr);} 824 ierr = PetscFree(matrix_A->icols); CHKERRQ(ierr); 825 ierr = PetscFree(matrix_A->row_nnz); CHKERRQ(ierr); 826 827 if (do_values) { matrix_A->values = vals; } 828 matrix_A->icols = icols; 829 matrix_A->row_nnz = row_nnz; 830 831 PetscFunctionReturn(0); 832 } 833 834 835 /* 836 spbas_apply_reordering_cols: 837 apply the given reordering to the columns: matrix_A(:,perm) = matrix_A; 838 */ 839 #undef __FUNCT__ 840 #define __FUNCT__ "spbas_apply_reordering_cols" 841 PetscErrorCode spbas_apply_reordering_cols( spbas_matrix *matrix_A,const PetscInt *permutation) 842 { 843 PetscInt i,j; 844 PetscInt nrows=matrix_A->nrows; 845 PetscInt row_nnz; 846 PetscInt *icols; 847 PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; 848 PetscScalar *vals=NULL; 849 850 PetscErrorCode ierr; 851 852 PetscFunctionBegin; 853 854 if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) 855 { 856 SETERRQ( PETSC_ERR_SUP_SYS, 857 "must have diagonal offsets in pattern\n"); 858 } 859 860 for (i=0; i<nrows;i++) 861 { 862 icols = matrix_A->icols[i]; 863 row_nnz = matrix_A->row_nnz[i]; 864 if (do_values) { vals = matrix_A->values[i]; } 865 866 for (j=0; j<row_nnz; j++) 867 { 868 icols[j] = permutation[i+icols[j]]-i; 869 } 870 ierr = spbas_mergesort(row_nnz, icols, vals); CHKERRQ(ierr); 871 } 872 873 PetscFunctionReturn(0); 874 } 875 876 /* 877 spbas_apply_reordering: 878 apply the given reordering: matrix_A(perm,perm) = matrix_A; 879 */ 880 #undef __FUNCT__ 881 #define __FUNCT__ "spbas_apply_reordering" 882 PetscErrorCode spbas_apply_reordering( spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm) 883 { 884 PetscErrorCode ierr; 885 PetscFunctionBegin; 886 ierr = spbas_apply_reordering_rows( matrix_A, inv_perm); CHKERRQ(ierr); 887 ierr = spbas_apply_reordering_cols( matrix_A, permutation); CHKERRQ(ierr); 888 PetscFunctionReturn(0); 889 } 890 891 #undef __FUNCT__ 892 #define __FUNCT__ "spbas_pattern_only" 893 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result) 894 { 895 spbas_matrix retval; 896 PetscInt i, j, i0, r_nnz; 897 PetscErrorCode ierr; 898 899 PetscFunctionBegin; 900 901 // Copy input values 902 retval.nrows = nrows; 903 retval.ncols = ncols; 904 retval.nnz = ai[nrows]; 905 906 retval.block_data = PETSC_TRUE; 907 retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS; 908 909 // Allocate output matrix 910 ierr = spbas_allocate_pattern(&retval, PETSC_FALSE); CHKERRQ(ierr); 911 for (i=0; i<nrows; i++) {retval.row_nnz[i] = ai[i+1]-ai[i];} 912 ierr = spbas_allocate_data(&retval); CHKERRQ(ierr); 913 // Copy the structure 914 for (i = 0; i<retval.nrows; i++) 915 { 916 i0 = ai[i]; 917 r_nnz = ai[i+1]-i0; 918 919 for (j=0; j<r_nnz; j++) 920 { 921 retval.icols[i][j] = aj[i0+j]-i; 922 } 923 } 924 925 // Set return value 926 *result = retval; 927 PetscFunctionReturn(0); 928 } 929 930 931 /* 932 spbas_mark_row_power: 933 Mark the columns in row 'row' which are nonzero in 934 matrix^2log(marker). 935 */ 936 #undef __FUNCT__ 937 #define __FUNCT__ "spbas_mark_row_power" 938 PetscErrorCode spbas_mark_row_power( 939 PetscInt *iwork, // marker-vector 940 PetscInt row, // row for which the columns are marked 941 spbas_matrix * in_matrix, // matrix for which the power is being 942 // calculated 943 PetscInt marker, // marker-value: 2^power 944 PetscInt minmrk, // lower bound for marked points 945 PetscInt maxmrk) // upper bound for marked points 946 { 947 PetscErrorCode ierr; 948 PetscInt i,j, nnz; 949 950 PetscFunctionBegin; 951 nnz = in_matrix->row_nnz[row]; 952 953 // For higher powers, call this function recursively 954 if (marker>1) 955 { 956 for (i=0; i<nnz;i++) 957 { 958 j = row + in_matrix->icols[row][i]; 959 if (minmrk<=j && j<maxmrk && iwork[j] < marker ) 960 { 961 ierr = spbas_mark_row_power( iwork, row + in_matrix->icols[row][i], 962 in_matrix, marker/2,minmrk,maxmrk); 963 CHKERRQ(ierr); 964 iwork[j] |= marker; 965 } 966 } 967 } 968 else 969 { 970 // Mark the columns reached 971 for (i=0; i<nnz;i++) 972 { 973 j = row + in_matrix->icols[row][i]; 974 if (minmrk<=j && j<maxmrk) { iwork[j] |= 1; } 975 } 976 } 977 978 PetscFunctionReturn(0); 979 } 980 981 982 /* 983 spbas_power 984 Calculate sparseness patterns for incomplete Cholesky decompositions 985 of a given order: (almost) all nonzeros of the matrix^(order+1) which 986 are inside the band width are found and stored in the output sparseness 987 pattern. 988 */ 989 #undef __FUNCT__ 990 #define __FUNCT__ "spbas_power" 991 PetscErrorCode spbas_power (spbas_matrix in_matrix,PetscInt power, spbas_matrix * result) 992 { 993 spbas_matrix retval; 994 PetscInt nrows = in_matrix.nrows; 995 PetscInt ncols = in_matrix.ncols; 996 PetscInt i, j, kend; 997 PetscInt nnz, inz; 998 PetscInt *iwork; 999 PetscInt marker; 1000 PetscInt maxmrk=0; 1001 PetscErrorCode ierr; 1002 1003 PetscFunctionBegin; 1004 if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) 1005 { 1006 SETERRQ( PETSC_ERR_SUP_SYS, 1007 "must have diagonal offsets in pattern\n"); 1008 } 1009 1010 if (ncols != nrows) 1011 { 1012 SETERRQ( PETSC_ERR_ARG_INCOMP, 1013 "Dimension error\n"); 1014 } 1015 1016 if (in_matrix.values) 1017 { 1018 SETERRQ( PETSC_ERR_ARG_INCOMP, 1019 "Input array must be sparseness pattern (no values)"); 1020 } 1021 1022 if (power<=0) 1023 { 1024 SETERRQ( PETSC_ERR_SUP_SYS, 1025 "Power must be 1 or up"); 1026 } 1027 1028 // Copy input values 1029 retval.nrows = ncols; 1030 retval.ncols = nrows; 1031 retval.nnz = 0; 1032 retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS; 1033 retval.block_data = PETSC_FALSE; 1034 1035 // Allocate sparseness pattern 1036 ierr = spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE); 1037 CHKERRQ(ierr); 1038 1039 // Allocate marker array 1040 ierr = PetscMalloc(nrows * sizeof(PetscInt), &iwork); CHKERRQ(ierr); 1041 1042 // Erase the pattern for this row 1043 memset( (void *) iwork, 0, retval.nrows*sizeof(PetscInt)); 1044 1045 // Calculate marker values 1046 marker = 1; for (i=1; i<power; i++) {marker*=2;} 1047 1048 for (i=0; i<nrows; i++) 1049 { 1050 // Calculate the pattern for each row 1051 1052 nnz = in_matrix.row_nnz[i]; 1053 kend = i+in_matrix.icols[i][nnz-1]; 1054 if (maxmrk<=kend) {maxmrk=kend+1;} 1055 ierr = spbas_mark_row_power( iwork, i, &in_matrix, marker, 1056 i, maxmrk); CHKERRQ(ierr); 1057 1058 // Count the columns 1059 nnz = 0; 1060 for (j=i; j<maxmrk; j++) {nnz+= (iwork[j]!=0);} 1061 1062 // Allocate the column indices 1063 retval.row_nnz[i] = nnz; 1064 ierr = PetscMalloc(nnz*sizeof(PetscInt),&retval.icols[i]); CHKERRQ(ierr); 1065 1066 // Administrate the column indices 1067 inz = 0; 1068 for (j=i; j<maxmrk; j++) 1069 { 1070 if (iwork[j]) 1071 { 1072 retval.icols[i][inz] = j-i; 1073 inz++; 1074 iwork[j]=0; 1075 } 1076 } 1077 retval.nnz += nnz; 1078 }; 1079 1080 ierr = PetscFree(iwork); CHKERRQ(ierr); 1081 1082 *result = retval; 1083 PetscFunctionReturn(0); 1084 } 1085 1086 1087 1088 /* 1089 spbas_keep_upper: 1090 remove the lower part of the matrix: keep the upper part 1091 */ 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "spbas_keep_upper" 1094 PetscErrorCode spbas_keep_upper( spbas_matrix * inout_matrix) 1095 { 1096 PetscInt i, j; 1097 PetscInt jstart; 1098 1099 PetscFunctionBegin; 1100 if (inout_matrix->block_data) 1101 { 1102 SETERRQ( PETSC_ERR_SUP_SYS, 1103 "Not yet for block data matrices\n"); 1104 } 1105 1106 for (i=0; i<inout_matrix->nrows; i++) 1107 { 1108 for (jstart=0; 1109 (jstart<inout_matrix->row_nnz[i]) && 1110 (inout_matrix->icols[i][jstart]<0); jstart++){} 1111 1112 if (jstart>0) 1113 { 1114 for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) 1115 { 1116 inout_matrix->icols[i][j] = 1117 inout_matrix->icols[i][j+jstart]; 1118 } 1119 1120 if (inout_matrix->values) 1121 { 1122 for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) 1123 { 1124 inout_matrix->values[i][j] = 1125 inout_matrix->values[i][j+jstart]; 1126 } 1127 } 1128 1129 inout_matrix->row_nnz[i] -= jstart; 1130 1131 inout_matrix->icols[i] = (PetscInt*) realloc( 1132 (void*) inout_matrix->icols[i], 1133 inout_matrix->row_nnz[i]*sizeof(PetscInt)); 1134 1135 if (inout_matrix->values) 1136 { 1137 inout_matrix->values[i] = (PetscScalar*) realloc( 1138 (void*) inout_matrix->values[i], 1139 inout_matrix->row_nnz[i]*sizeof(PetscScalar)); 1140 } 1141 inout_matrix->nnz -= jstart; 1142 } 1143 } 1144 PetscFunctionReturn(0); 1145 } 1146 1147