1 #include "petsc.h" 2 #include "../src/mat/impls/aij/seq/aij.h" 3 #include "spbas.h" 4 5 6 /* 7 spbas_memory_requirement: 8 Calculate the number of bytes needed to store tha matrix 9 */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "spbas_memory_requirement" 12 long int spbas_memory_requirement( spbas_matrix matrix) 13 { 14 long int memreq = 6 * sizeof(PetscInt) + /* nrows, ncols, nnz, n_alloc_icol, 15 n_alloc_val, col_idx_type */ 16 sizeof(PetscTruth) + /* block_data */ 17 sizeof(PetscScalar**) + /* values */ 18 sizeof(PetscScalar*) + /* alloc_val */ 19 2 * sizeof(int**) + /* icols, icols0 */ 20 2 * sizeof(PetscInt*) + /* row_nnz, alloc_icol */ 21 matrix.nrows * sizeof(PetscInt) + /* row_nnz[*] */ 22 matrix.nrows * sizeof(PetscInt*); /* icols[*] */ 23 24 /* icol0[*] */ 25 if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) { memreq += matrix.nrows * sizeof(PetscInt); } 26 27 /* icols[*][*] */ 28 if (matrix.block_data) { memreq += matrix.n_alloc_icol * sizeof(PetscInt); } 29 else { memreq += matrix.nnz * sizeof(PetscInt); } 30 31 if (matrix.values) { 32 memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */ 33 /* values[*][*] */ 34 if (matrix.block_data) { memreq += matrix.n_alloc_val * sizeof(PetscScalar); } 35 else { memreq += matrix.nnz * sizeof(PetscScalar); } 36 } 37 return memreq; 38 } 39 40 /* 41 spbas_allocate_pattern: 42 allocate the pattern arrays row_nnz, icols and optionally values 43 */ 44 #undef __FUNCT__ 45 #define __FUNCT__ "spbas_allocate_pattern" 46 PetscErrorCode spbas_allocate_pattern( spbas_matrix * result, PetscTruth do_values) 47 { 48 PetscErrorCode ierr; 49 PetscInt nrows = result->nrows; 50 PetscInt col_idx_type = result->col_idx_type; 51 52 PetscFunctionBegin; 53 /* Allocate sparseness pattern */ 54 ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->row_nnz);CHKERRQ(ierr); 55 ierr = PetscMalloc(nrows*sizeof(PetscInt*),&result->icols);CHKERRQ(ierr); 56 57 /* If offsets are given wrt an array, create array */ 58 if (col_idx_type == SPBAS_OFFSET_ARRAY) { 59 ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->icol0);CHKERRQ(ierr); 60 } else { 61 result->icol0 = PETSC_NULL; 62 } 63 64 /* If values are given, allocate values array */ 65 if (do_values) { 66 ierr = PetscMalloc(nrows*sizeof(PetscScalar*),&result->values);CHKERRQ(ierr); 67 } else { 68 result->values = PETSC_NULL; 69 } 70 PetscFunctionReturn(0); 71 } 72 73 /* 74 spbas_allocate_data: 75 in case of block_data: 76 Allocate the data arrays alloc_icol and optionally alloc_val, 77 set appropriate pointers from icols and values; 78 in case of !block_data: 79 Allocate the arrays icols[i] and optionally values[i] 80 */ 81 #undef __FUNCT__ 82 #define __FUNCT__ "spbas_allocate_data" 83 PetscErrorCode spbas_allocate_data( spbas_matrix * result) 84 { 85 PetscInt i; 86 PetscInt nnz = result->nnz; 87 PetscInt nrows = result->nrows; 88 PetscInt r_nnz; 89 PetscErrorCode ierr; 90 PetscTruth do_values = (result->values != PETSC_NULL) ? PETSC_TRUE : PETSC_FALSE; 91 PetscTruth block_data = result->block_data; 92 93 PetscFunctionBegin; 94 if (block_data) { 95 /* Allocate the column number array and point to it */ 96 result->n_alloc_icol = nnz; 97 ierr = PetscMalloc(nnz*sizeof(PetscInt), &result->alloc_icol);CHKERRQ(ierr); 98 result->icols[0]= result->alloc_icol; 99 for (i=1; i<nrows; i++) { 100 result->icols[i] = result->icols[i-1] + result->row_nnz[i-1]; 101 } 102 103 /* Allocate the value array and point to it */ 104 if (do_values) { 105 result->n_alloc_val= nnz; 106 ierr = PetscMalloc(nnz*sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr); 107 result->values[0]= result->alloc_val; 108 for (i=1; i<nrows; i++) { 109 result->values[i] = result->values[i-1] + result->row_nnz[i-1]; 110 } 111 } 112 } else { 113 for (i=0; i<nrows; i++) { 114 r_nnz = result->row_nnz[i]; 115 ierr = PetscMalloc(r_nnz*sizeof(PetscInt), &result->icols[i]);CHKERRQ(ierr); 116 } 117 if (do_values) { 118 for (i=0; i<nrows; i++) { 119 r_nnz = result->row_nnz[i]; 120 ierr = PetscMalloc(r_nnz*sizeof(PetscScalar), &result->values[i]);CHKERRQ(ierr); 121 } 122 } 123 } 124 PetscFunctionReturn(0); 125 } 126 127 /* 128 spbas_row_order_icol 129 determine if row i1 should come 130 + before row i2 in the sorted rows (return -1), 131 + after (return 1) 132 + is identical (return 0). 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "spbas_row_order_icol" 136 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type) 137 { 138 PetscInt j; 139 PetscInt nnz1 = irow_in[i1+1] - irow_in[i1]; 140 PetscInt nnz2 = irow_in[i2+1] - irow_in[i2]; 141 PetscInt * icol1 = &icol_in[irow_in[i1]]; 142 PetscInt * icol2 = &icol_in[irow_in[i2]]; 143 144 if (nnz1<nnz2) {return -1;} 145 if (nnz1>nnz2) {return 1;} 146 147 if (col_idx_type == SPBAS_COLUMN_NUMBERS) { 148 for (j=0; j<nnz1; j++) { 149 if (icol1[j]< icol2[j]) {return -1;} 150 if (icol1[j]> icol2[j]) {return 1;} 151 } 152 } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 153 for (j=0; j<nnz1; j++) { 154 if (icol1[j]-i1< icol2[j]-i2) {return -1;} 155 if (icol1[j]-i1> icol2[j]-i2) {return 1;} 156 } 157 } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 158 for (j=1; j<nnz1; j++) { 159 if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) {return -1;} 160 if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) {return 1;} 161 } 162 } 163 return 0; 164 } 165 166 /* 167 spbas_mergesort_icols: 168 return a sorting of the rows in which identical sparseness patterns are 169 next to each other 170 */ 171 #undef __FUNCT__ 172 #define __FUNCT__ "spbas_mergesort_icols" 173 PetscErrorCode spbas_mergesort_icols( PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort) 174 { 175 PetscErrorCode ierr; 176 PetscInt istep; /* Chunk-sizes of already sorted parts of arrays */ 177 PetscInt i, i1, i2; /* Loop counters for (partly) sorted arrays */ 178 PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both parts */ 179 PetscInt *ialloc; /* Allocated arrays */ 180 PetscInt *iswap; /* auxiliary pointers for swapping */ 181 PetscInt *ihlp1; /* Pointers to new version of arrays, */ 182 PetscInt *ihlp2; /* Pointers to previous version of arrays, */ 183 184 PetscFunctionBegin; 185 186 ierr = PetscMalloc(nrows*sizeof(PetscInt),&ialloc);CHKERRQ(ierr); 187 188 ihlp1 = ialloc; 189 ihlp2 = isort; 190 191 /* Sorted array chunks are first 1 long, and increase until they are the complete array */ 192 for (istep=1; istep<nrows; istep*=2) { 193 /* 194 Combine sorted parts 195 istart:istart+istep-1 and istart+istep-1:istart+2*istep-1 196 of ihlp2 and vhlp2 197 198 into one sorted part 199 istart:istart+2*istep-1 200 of ihlp1 and vhlp1 201 */ 202 for (istart=0; istart<nrows; istart+=2*istep) { 203 204 /* Set counters and bound array part endings */ 205 i1=istart; i1end = i1+istep; if (i1end>nrows) {i1end=nrows;} 206 i2=istart+istep; i2end = i2+istep; if (i2end>nrows) {i2end=nrows;} 207 208 /* Merge the two array parts */ 209 for (i=istart; i<i2end; i++) { 210 if (i1<i1end && i2<i2end && spbas_row_order_icol( ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) { 211 ihlp1[i] = ihlp2[i1]; 212 i1++; 213 } else if (i2<i2end ) { 214 ihlp1[i] = ihlp2[i2]; 215 i2++; 216 } else { 217 ihlp1[i] = ihlp2[i1]; 218 i1++; 219 } 220 } 221 } 222 223 /* Swap the two array sets */ 224 iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap; 225 } 226 227 /* Copy one more time in case the sorted arrays are the temporary ones */ 228 if (ihlp2 != isort) { 229 for (i=0; i<nrows; i++) { isort[i] = ihlp2[i]; } 230 } 231 ierr = PetscFree(ialloc);CHKERRQ(ierr); 232 PetscFunctionReturn(0); 233 } 234 235 236 237 /* 238 spbas_compress_pattern: 239 calculate a compressed sparseness pattern for a sparseness pattern 240 given in compressed row storage. The compressed sparseness pattern may 241 require (much) less memory. 242 */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "spbas_compress_pattern" 245 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction) 246 { 247 PetscInt nnz = irow_in[nrows]; 248 long int mem_orig = (nrows + nnz) * sizeof(PetscInt); 249 long int mem_compressed; 250 PetscErrorCode ierr; 251 PetscInt *isort; 252 PetscInt *icols; 253 PetscInt row_nnz; 254 PetscInt *ipoint; 255 PetscTruth *used; 256 PetscInt ptr; 257 PetscInt i,j; 258 const PetscTruth no_values = PETSC_FALSE; 259 260 PetscFunctionBegin; 261 /* Allocate the structure of the new matrix */ 262 B->nrows = nrows; 263 B->ncols = ncols; 264 B->nnz = nnz; 265 B->col_idx_type= col_idx_type; 266 B->block_data = PETSC_TRUE; 267 ierr = spbas_allocate_pattern( B, no_values);CHKERRQ(ierr); 268 269 /* When using an offset array, set it */ 270 if (col_idx_type==SPBAS_OFFSET_ARRAY) { 271 for (i=0; i<nrows; i++) {B->icol0[i] = icol_in[irow_in[i]];} 272 } 273 274 /* Allocate the ordering for the rows */ 275 ierr = PetscMalloc(nrows*sizeof(PetscInt),&isort);CHKERRQ(ierr); 276 ierr = PetscMalloc(nrows*sizeof(PetscInt),&ipoint);CHKERRQ(ierr); 277 ierr = PetscMalloc(nrows*sizeof(PetscTruth),&used);CHKERRQ(ierr); 278 279 /* Initialize the sorting */ 280 ierr = PetscMemzero((void*) used, nrows*sizeof(PetscTruth));CHKERRQ(ierr); 281 for (i = 0; i<nrows; i++) { 282 B->row_nnz[i] = irow_in[i+1]-irow_in[i]; 283 isort[i] = i; 284 ipoint[i]= i; 285 } 286 287 /* Sort the rows so that identical columns will be next to each other */ 288 ierr = spbas_mergesort_icols( nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr); 289 ierr = PetscInfo(PETSC_NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr); 290 291 /* Replace identical rows with the first one in the list */ 292 for (i=1; i<nrows; i++) { 293 if (spbas_row_order_icol(isort[i-1], isort[i], irow_in, icol_in, col_idx_type) == 0) { 294 ipoint[isort[i]] = ipoint[isort[i-1]]; 295 } 296 } 297 298 /* Collect the rows which are used*/ 299 for (i=0; i<nrows; i++) {used[ipoint[i]] = PETSC_TRUE;} 300 301 /* Calculate needed memory */ 302 B->n_alloc_icol = 0; 303 for (i=0; i<nrows; i++) { 304 if (used[i]) {B->n_alloc_icol += B->row_nnz[i];} 305 } 306 ierr = PetscMalloc(B->n_alloc_icol*sizeof(PetscInt),&B->alloc_icol);CHKERRQ(ierr); 307 308 /* Fill in the diagonal offsets for the rows which store their own data */ 309 ptr = 0; 310 for (i=0; i<B->nrows; i++) { 311 if (used[i]) { 312 B->icols[i] = &B->alloc_icol[ptr]; 313 icols = &icol_in[irow_in[i]]; 314 row_nnz = B->row_nnz[i]; 315 if (col_idx_type == SPBAS_COLUMN_NUMBERS) { 316 for (j=0; j<row_nnz; j++) { 317 B->icols[i][j] = icols[j]; 318 } 319 } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 320 for (j=0; j<row_nnz; j++) { 321 B->icols[i][j] = icols[j]-i; 322 } 323 } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 324 for (j=0; j<row_nnz; j++) { 325 B->icols[i][j] = icols[j]-icols[0]; 326 } 327 } 328 ptr += B->row_nnz[i]; 329 } 330 } 331 332 /* Point to the right places for all data */ 333 for (i=0; i<nrows; i++) { 334 B->icols[i] = B->icols[ipoint[i]]; 335 } 336 ierr = PetscInfo(PETSC_NULL,"Row patterns have been compressed\n");CHKERRQ(ierr); 337 ierr = PetscInfo1(PETSC_NULL," (%G nonzeros per row)\n", (PetscReal) nnz / (PetscReal) nrows);CHKERRQ(ierr); 338 339 ierr=PetscFree(isort);CHKERRQ(ierr); 340 ierr=PetscFree(used);CHKERRQ(ierr); 341 ierr=PetscFree(ipoint);CHKERRQ(ierr); 342 343 mem_compressed = spbas_memory_requirement( *B ); 344 *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ (PetscReal) mem_orig; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 spbas_incomplete_cholesky 350 Incomplete Cholesky decomposition 351 */ 352 #include "spbas_cholesky.h" 353 354 /* 355 spbas_delete : de-allocate the arrays owned by this matrix 356 */ 357 #undef __FUNCT__ 358 #define __FUNCT__ "spbas_delete" 359 PetscErrorCode spbas_delete(spbas_matrix matrix) 360 { 361 PetscInt i; 362 PetscErrorCode ierr; 363 364 PetscFunctionBegin; 365 if (matrix.block_data) { 366 ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr) 367 if (matrix.values){ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);} 368 } else { 369 for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);} 370 ierr = PetscFree(matrix.icols);CHKERRQ(ierr); 371 if (matrix.values) { 372 for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);} 373 } 374 } 375 376 ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr); 377 ierr=PetscFree(matrix.icols);CHKERRQ(ierr); 378 if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);} 379 if (matrix.values) {ierr=PetscFree(matrix.values);CHKERRQ(ierr);} 380 PetscFunctionReturn(0); 381 } 382 383 /* 384 spbas_matrix_to_crs: 385 Convert an spbas_matrix to compessed row storage 386 */ 387 #undef __FUNCT__ 388 #define __FUNCT__ "spbas_matrix_to_crs" 389 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out) 390 { 391 PetscInt nrows = matrix_A.nrows; 392 PetscInt nnz = matrix_A.nnz; 393 PetscInt i,j,r_nnz,i0; 394 PetscInt *irow; 395 PetscInt *icol; 396 PetscInt *icol_A; 397 MatScalar *val; 398 PetscScalar *val_A; 399 PetscInt col_idx_type = matrix_A.col_idx_type; 400 PetscTruth do_values = matrix_A.values ? PETSC_TRUE : PETSC_FALSE; 401 PetscErrorCode ierr; 402 403 PetscFunctionBegin; 404 ierr = PetscMalloc( sizeof(PetscInt) * (nrows+1), &irow);CHKERRQ(ierr); 405 ierr = PetscMalloc( sizeof(PetscInt) * nnz, &icol);CHKERRQ(ierr); 406 *icol_out = icol; 407 *irow_out=irow; 408 if (do_values) { 409 ierr = PetscMalloc( sizeof(MatScalar) * nnz, &val);CHKERRQ(ierr); 410 *val_out = val; *icol_out = icol; *irow_out=irow; 411 } 412 413 irow[0]=0; 414 for (i=0; i<nrows; i++) { 415 r_nnz = matrix_A.row_nnz[i]; 416 i0 = irow[i]; 417 irow[i+1] = i0 + r_nnz; 418 icol_A = matrix_A.icols[i]; 419 420 if (do_values) { 421 val_A = matrix_A.values[i]; 422 for (j=0; j<r_nnz; j++) { 423 icol[i0+j] = icol_A[j]; 424 val[i0+j] = val_A[j]; 425 } 426 } else { 427 for (j=0; j<r_nnz; j++) { icol[i0+j] = icol_A[j]; } 428 } 429 430 if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 431 for (j=0; j<r_nnz; j++) { icol[i0+j] += i; } 432 } 433 else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 434 i0 = matrix_A.icol0[i]; 435 for (j=0; j<r_nnz; j++) { icol[i0+j] += i0; } 436 } 437 } 438 PetscFunctionReturn(0); 439 } 440 441 442 /* 443 spbas_transpose 444 return the transpose of a matrix 445 */ 446 #undef __FUNCT__ 447 #define __FUNCT__ "spbas_transpose" 448 PetscErrorCode spbas_transpose( spbas_matrix in_matrix, spbas_matrix * result) 449 { 450 PetscInt col_idx_type = in_matrix.col_idx_type; 451 PetscInt nnz = in_matrix.nnz; 452 PetscInt ncols = in_matrix.nrows; 453 PetscInt nrows = in_matrix.ncols; 454 PetscInt i,j,k; 455 PetscInt r_nnz; 456 PetscInt *irow; 457 PetscInt icol0; 458 PetscScalar * val; 459 PetscErrorCode ierr; 460 461 PetscFunctionBegin; 462 463 /* Copy input values */ 464 result->nrows = nrows; 465 result->ncols = ncols; 466 result->nnz = nnz; 467 result->col_idx_type = SPBAS_COLUMN_NUMBERS; 468 result->block_data = PETSC_TRUE; 469 470 /* Allocate sparseness pattern */ 471 ierr = spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr); 472 473 /* Count the number of nonzeros in each row */ 474 for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; } 475 476 for (i=0; i<ncols; i++) { 477 r_nnz = in_matrix.row_nnz[i]; 478 irow = in_matrix.icols[i]; 479 if (col_idx_type == SPBAS_COLUMN_NUMBERS) { 480 for (j=0; j<r_nnz; j++) { result->row_nnz[irow[j]]++; } 481 } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { 482 for (j=0; j<r_nnz; j++) { result->row_nnz[i+irow[j]]++; } 483 } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { 484 icol0=in_matrix.icol0[i]; 485 for (j=0; j<r_nnz; j++) { result->row_nnz[icol0+irow[j]]++; } 486 } 487 } 488 489 /* Set the pointers to the data */ 490 ierr = spbas_allocate_data(result);CHKERRQ(ierr); 491 492 /* Reset the number of nonzeros in each row */ 493 for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; } 494 495 /* Fill the data arrays */ 496 if (in_matrix.values) { 497 for (i=0; i<ncols; i++) { 498 r_nnz = in_matrix.row_nnz[i]; 499 irow = in_matrix.icols[i]; 500 val = in_matrix.values[i]; 501 502 if (col_idx_type == SPBAS_COLUMN_NUMBERS) {icol0=0;} 503 else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;} 504 else if (col_idx_type == SPBAS_OFFSET_ARRAY) 505 {icol0=in_matrix.icol0[i];} 506 for (j=0; j<r_nnz; j++) { 507 k = icol0 + irow[j]; 508 result->icols[k][result->row_nnz[k]] = i; 509 result->values[k][result->row_nnz[k]] = val[j]; 510 result->row_nnz[k]++; 511 } 512 } 513 } else { 514 for (i=0; i<ncols; i++) { 515 r_nnz = in_matrix.row_nnz[i]; 516 irow = in_matrix.icols[i]; 517 518 if (col_idx_type == SPBAS_COLUMN_NUMBERS) {icol0=0;} 519 else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;} 520 else if (col_idx_type == SPBAS_OFFSET_ARRAY) {icol0=in_matrix.icol0[i];} 521 522 for (j=0; j<r_nnz; j++) { 523 k = icol0 + irow[j]; 524 result->icols[k][result->row_nnz[k]] = i; 525 result->row_nnz[k]++; 526 } 527 } 528 } 529 PetscFunctionReturn(0); 530 } 531 532 /* 533 spbas_mergesort 534 535 mergesort for an array of intergers and an array of associated 536 reals 537 538 on output, icol[0..nnz-1] is increasing; 539 val[0..nnz-1] has undergone the same permutation as icol 540 541 NB: val may be PETSC_NULL: in that case, only the integers are sorted 542 543 */ 544 #undef __FUNCT__ 545 #define __FUNCT__ "spbas_mergesort" 546 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val) 547 { 548 PetscInt istep; /* Chunk-sizes of already sorted parts of arrays */ 549 PetscInt i, i1, i2; /* Loop counters for (partly) sorted arrays */ 550 PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both parts */ 551 PetscInt *ialloc; /* Allocated arrays */ 552 PetscScalar *valloc=PETSC_NULL; 553 PetscInt *iswap; /* auxiliary pointers for swapping */ 554 PetscScalar *vswap; 555 PetscInt *ihlp1; /* Pointers to new version of arrays, */ 556 PetscScalar *vhlp1=PETSC_NULL; /* (arrays under construction) */ 557 PetscInt *ihlp2; /* Pointers to previous version of arrays, */ 558 PetscScalar *vhlp2=PETSC_NULL; 559 PetscErrorCode ierr; 560 561 ierr = PetscMalloc(nnz*sizeof(PetscInt),&ialloc);CHKERRQ(ierr); 562 ihlp1 = ialloc; 563 ihlp2 = icol; 564 565 if (val) { 566 ierr = PetscMalloc(nnz*sizeof(PetscScalar),&valloc);CHKERRQ(ierr); 567 vhlp1 = valloc; 568 vhlp2 = val; 569 } 570 571 572 /* Sorted array chunks are first 1 long, and increase until they are the complete array */ 573 for (istep=1; istep<nnz; istep*=2) { 574 /* 575 Combine sorted parts 576 istart:istart+istep-1 and istart+istep-1:istart+2*istep-1 577 of ihlp2 and vhlp2 578 579 into one sorted part 580 istart:istart+2*istep-1 581 of ihlp1 and vhlp1 582 */ 583 for (istart=0; istart<nnz; istart+=2*istep) { 584 585 /* Set counters and bound array part endings */ 586 i1=istart; i1end = i1+istep; if (i1end>nnz) {i1end=nnz;} 587 i2=istart+istep; i2end = i2+istep; if (i2end>nnz) {i2end=nnz;} 588 589 /* Merge the two array parts */ 590 if (val) { 591 for (i=istart; i<i2end; i++) { 592 if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) { 593 ihlp1[i] = ihlp2[i1]; 594 vhlp1[i] = vhlp2[i1]; 595 i1++; 596 } else if (i2<i2end ){ 597 ihlp1[i] = ihlp2[i2]; 598 vhlp1[i] = vhlp2[i2]; 599 i2++; 600 } else { 601 ihlp1[i] = ihlp2[i1]; 602 vhlp1[i] = vhlp2[i1]; 603 i1++; 604 } 605 } 606 } else { 607 for (i=istart; i<i2end; i++) { 608 if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) { 609 ihlp1[i] = ihlp2[i1]; 610 i1++; 611 } else if (i2<i2end ) { 612 ihlp1[i] = ihlp2[i2]; 613 i2++; 614 } else { 615 ihlp1[i] = ihlp2[i1]; 616 i1++; 617 } 618 } 619 } 620 } 621 622 /* Swap the two array sets */ 623 iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap; 624 vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap; 625 } 626 627 /* Copy one more time in case the sorted arrays are the temporary ones */ 628 if (ihlp2 != icol) { 629 for (i=0; i<nnz; i++) { icol[i] = ihlp2[i]; } 630 if (val) { 631 for (i=0; i<nnz; i++) { val[i] = vhlp2[i]; } 632 } 633 } 634 635 ierr = PetscFree(ialloc);CHKERRQ(ierr); 636 if(val){ierr = PetscFree(valloc);CHKERRQ(ierr);} 637 PetscFunctionReturn(0); 638 } 639 640 /* 641 spbas_apply_reordering_rows: 642 apply the given reordering to the rows: matrix_A = matrix_A(perm,:); 643 */ 644 #undef __FUNCT__ 645 #define __FUNCT__ "spbas_apply_reordering_rows" 646 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation) 647 { 648 PetscInt i,j,ip; 649 PetscInt nrows=matrix_A->nrows; 650 PetscInt * row_nnz; 651 PetscInt **icols; 652 PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; 653 PetscScalar **vals=PETSC_NULL; 654 PetscErrorCode ierr; 655 656 PetscFunctionBegin; 657 if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ( PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n"); 658 659 if (do_values) { 660 ierr = PetscMalloc( sizeof(PetscScalar*)*nrows, &vals);CHKERRQ(ierr); 661 } 662 ierr = PetscMalloc( sizeof(PetscInt)*nrows, &row_nnz);CHKERRQ(ierr); 663 ierr = PetscMalloc( sizeof(PetscInt*)*nrows, &icols);CHKERRQ(ierr); 664 665 for (i=0; i<nrows;i++) { 666 ip = permutation[i]; 667 if (do_values) {vals[i] = matrix_A->values[ip];} 668 icols[i] = matrix_A->icols[ip]; 669 row_nnz[i] = matrix_A->row_nnz[ip]; 670 for (j=0; j<row_nnz[i]; j++) { icols[i][j] += ip-i; } 671 } 672 673 if (do_values){ ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);} 674 ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr); 675 ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr); 676 677 if (do_values) { matrix_A->values = vals; } 678 matrix_A->icols = icols; 679 matrix_A->row_nnz = row_nnz; 680 681 PetscFunctionReturn(0); 682 } 683 684 685 /* 686 spbas_apply_reordering_cols: 687 apply the given reordering to the columns: matrix_A(:,perm) = matrix_A; 688 */ 689 #undef __FUNCT__ 690 #define __FUNCT__ "spbas_apply_reordering_cols" 691 PetscErrorCode spbas_apply_reordering_cols( spbas_matrix *matrix_A,const PetscInt *permutation) 692 { 693 PetscInt i,j; 694 PetscInt nrows=matrix_A->nrows; 695 PetscInt row_nnz; 696 PetscInt *icols; 697 PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; 698 PetscScalar *vals=PETSC_NULL; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ( PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n"); 703 704 for (i=0; i<nrows;i++) { 705 icols = matrix_A->icols[i]; 706 row_nnz = matrix_A->row_nnz[i]; 707 if (do_values) { vals = matrix_A->values[i]; } 708 709 for (j=0; j<row_nnz; j++) { 710 icols[j] = permutation[i+icols[j]]-i; 711 } 712 ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr); 713 } 714 PetscFunctionReturn(0); 715 } 716 717 /* 718 spbas_apply_reordering: 719 apply the given reordering: matrix_A(perm,perm) = matrix_A; 720 */ 721 #undef __FUNCT__ 722 #define __FUNCT__ "spbas_apply_reordering" 723 PetscErrorCode spbas_apply_reordering( spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm) 724 { 725 PetscErrorCode ierr; 726 PetscFunctionBegin; 727 ierr = spbas_apply_reordering_rows( matrix_A, inv_perm);CHKERRQ(ierr); 728 ierr = spbas_apply_reordering_cols( matrix_A, permutation);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "spbas_pattern_only" 734 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result) 735 { 736 spbas_matrix retval; 737 PetscInt i, j, i0, r_nnz; 738 PetscErrorCode ierr; 739 740 PetscFunctionBegin; 741 742 /* Copy input values */ 743 retval.nrows = nrows; 744 retval.ncols = ncols; 745 retval.nnz = ai[nrows]; 746 747 retval.block_data = PETSC_TRUE; 748 retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS; 749 750 /* Allocate output matrix */ 751 ierr = spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr); 752 for (i=0; i<nrows; i++) {retval.row_nnz[i] = ai[i+1]-ai[i];} 753 ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 754 /* Copy the structure */ 755 for (i = 0; i<retval.nrows; i++) { 756 i0 = ai[i]; 757 r_nnz = ai[i+1]-i0; 758 759 for (j=0; j<r_nnz; j++) { 760 retval.icols[i][j] = aj[i0+j]-i; 761 } 762 } 763 *result = retval; 764 PetscFunctionReturn(0); 765 } 766 767 768 /* 769 spbas_mark_row_power: 770 Mark the columns in row 'row' which are nonzero in 771 matrix^2log(marker). 772 */ 773 #undef __FUNCT__ 774 #define __FUNCT__ "spbas_mark_row_power" 775 PetscErrorCode spbas_mark_row_power( 776 PetscInt *iwork, /* marker-vector */ 777 PetscInt row, /* row for which the columns are marked */ 778 spbas_matrix * in_matrix, /* matrix for which the power is being calculated */ 779 PetscInt marker, /* marker-value: 2^power */ 780 PetscInt minmrk, /* lower bound for marked points */ 781 PetscInt maxmrk) /* upper bound for marked points */ 782 { 783 PetscErrorCode ierr; 784 PetscInt i,j, nnz; 785 786 PetscFunctionBegin; 787 nnz = in_matrix->row_nnz[row]; 788 789 /* For higher powers, call this function recursively */ 790 if (marker>1) { 791 for (i=0; i<nnz;i++) { 792 j = row + in_matrix->icols[row][i]; 793 if (minmrk<=j && j<maxmrk && iwork[j] < marker ) { 794 ierr = spbas_mark_row_power( iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr); 795 iwork[j] |= marker; 796 } 797 } 798 } else { 799 /* Mark the columns reached */ 800 for (i=0; i<nnz;i++) { 801 j = row + in_matrix->icols[row][i]; 802 if (minmrk<=j && j<maxmrk) { iwork[j] |= 1; } 803 } 804 } 805 PetscFunctionReturn(0); 806 } 807 808 809 /* 810 spbas_power 811 Calculate sparseness patterns for incomplete Cholesky decompositions 812 of a given order: (almost) all nonzeros of the matrix^(order+1) which 813 are inside the band width are found and stored in the output sparseness 814 pattern. 815 */ 816 #undef __FUNCT__ 817 #define __FUNCT__ "spbas_power" 818 PetscErrorCode spbas_power (spbas_matrix in_matrix,PetscInt power, spbas_matrix * result) 819 { 820 spbas_matrix retval; 821 PetscInt nrows = in_matrix.nrows; 822 PetscInt ncols = in_matrix.ncols; 823 PetscInt i, j, kend; 824 PetscInt nnz, inz; 825 PetscInt *iwork; 826 PetscInt marker; 827 PetscInt maxmrk=0; 828 PetscErrorCode ierr; 829 830 PetscFunctionBegin; 831 if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ( PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n"); 832 if (ncols != nrows) SETERRQ( PETSC_ERR_ARG_INCOMP, "Dimension error\n"); 833 if (in_matrix.values) SETERRQ( PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)"); 834 if (power<=0) SETERRQ( PETSC_ERR_SUP_SYS, "Power must be 1 or up"); 835 836 /* Copy input values*/ 837 retval.nrows = ncols; 838 retval.ncols = nrows; 839 retval.nnz = 0; 840 retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS; 841 retval.block_data = PETSC_FALSE; 842 843 /* Allocate sparseness pattern */ 844 ierr = spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr); 845 846 /* Allocate marker array */ 847 ierr = PetscMalloc(nrows * sizeof(PetscInt), &iwork);CHKERRQ(ierr); 848 849 /* Erase the pattern for this row */ 850 ierr = PetscMemzero( (void *) iwork, retval.nrows*sizeof(PetscInt));CHKERRQ(ierr); 851 852 /* Calculate marker values */ 853 marker = 1; for (i=1; i<power; i++) {marker*=2;} 854 855 for (i=0; i<nrows; i++) { 856 /* Calculate the pattern for each row */ 857 858 nnz = in_matrix.row_nnz[i]; 859 kend = i+in_matrix.icols[i][nnz-1]; 860 if (maxmrk<=kend) {maxmrk=kend+1;} 861 ierr = spbas_mark_row_power( iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr); 862 863 /* Count the columns*/ 864 nnz = 0; 865 for (j=i; j<maxmrk; j++) {nnz+= (iwork[j]!=0);} 866 867 /* Allocate the column indices */ 868 retval.row_nnz[i] = nnz; 869 ierr = PetscMalloc(nnz*sizeof(PetscInt),&retval.icols[i]);CHKERRQ(ierr); 870 871 /* Administrate the column indices */ 872 inz = 0; 873 for (j=i; j<maxmrk; j++) { 874 if (iwork[j]) { 875 retval.icols[i][inz] = j-i; 876 inz++; 877 iwork[j]=0; 878 } 879 } 880 retval.nnz += nnz; 881 }; 882 ierr = PetscFree(iwork);CHKERRQ(ierr); 883 *result = retval; 884 PetscFunctionReturn(0); 885 } 886 887 888 889 /* 890 spbas_keep_upper: 891 remove the lower part of the matrix: keep the upper part 892 */ 893 #undef __FUNCT__ 894 #define __FUNCT__ "spbas_keep_upper" 895 PetscErrorCode spbas_keep_upper( spbas_matrix * inout_matrix) 896 { 897 PetscInt i, j; 898 PetscInt jstart; 899 900 PetscFunctionBegin; 901 if (inout_matrix->block_data) SETERRQ( PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n"); 902 for (i=0; i<inout_matrix->nrows; i++) { 903 for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++){} 904 if (jstart>0) { 905 for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) { 906 inout_matrix->icols[i][j] = inout_matrix->icols[i][j+jstart]; 907 } 908 909 if (inout_matrix->values) { 910 for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) { 911 inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart]; 912 } 913 } 914 915 inout_matrix->row_nnz[i] -= jstart; 916 917 inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt)); 918 919 if (inout_matrix->values) { 920 inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar)); 921 } 922 inout_matrix->nnz -= jstart; 923 } 924 } 925 PetscFunctionReturn(0); 926 } 927 928