#include "petsc.h" #include "../src/mat/impls/aij/seq/aij.h" #include "spbas.h" /* spbas_memory_requirement: Calculate the number of bytes needed to store tha matrix */ #undef __FUNCT__ #define __FUNCT__ "spbas_memory_requirement" long int spbas_memory_requirement( spbas_matrix matrix) { // Requirement for long int memreq = 6 * sizeof(PetscInt) + // nrows, ncols, nnz, n_alloc_icol, // n_alloc_val, col_idx_type sizeof(PetscTruth) + // block_data sizeof(PetscScalar**) + // values sizeof(PetscScalar*) + // alloc_val 2 * sizeof(int**) + // icols, icols0 2 * sizeof(PetscInt*) + // row_nnz, alloc_icol matrix.nrows * sizeof(PetscInt) + // row_nnz[*] matrix.nrows * sizeof(PetscInt*); // icols[*] // icol0[*] if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) { memreq += matrix.nrows * sizeof(PetscInt); } // icols[*][*] if (matrix.block_data) { memreq += matrix.n_alloc_icol * sizeof(PetscInt); } else { memreq += matrix.nnz * sizeof(PetscInt); } if (matrix.values) { memreq += matrix.nrows * sizeof(PetscScalar*); // values[*] // values[*][*] if (matrix.block_data) { memreq += matrix.n_alloc_val * sizeof(PetscScalar); } else { memreq += matrix.nnz * sizeof(PetscScalar); } } return memreq; } /* spbas_allocate_pattern: allocate the pattern arrays row_nnz, icols and optionally values */ #undef __FUNCT__ #define __FUNCT__ "spbas_allocate_pattern" PetscErrorCode spbas_allocate_pattern( spbas_matrix * result, PetscTruth do_values) { PetscErrorCode ierr; PetscInt nrows = result->nrows; PetscInt col_idx_type = result->col_idx_type; PetscFunctionBegin; // Allocate sparseness pattern ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->row_nnz); CHKERRQ(ierr); ierr = PetscMalloc(nrows*sizeof(PetscInt*),&result->icols); CHKERRQ(ierr); // If offsets are given wrt an array, create array if (col_idx_type == SPBAS_OFFSET_ARRAY) { ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->icol0); CHKERRQ(ierr); } else { result->icol0 = NULL; } // If values are given, allocate values array if (do_values) { ierr = PetscMalloc(nrows*sizeof(PetscScalar*),&result->values); CHKERRQ(ierr); } else { result->values = NULL; } PetscFunctionReturn(0); } /* spbas_allocate_data: in case of block_data: Allocate the data arrays alloc_icol and optionally alloc_val, set appropriate pointers from icols and values; in case of !block_data: Allocate the arrays icols[i] and optionally values[i] */ #undef __FUNCT__ #define __FUNCT__ "spbas_allocate_data" PetscErrorCode spbas_allocate_data( spbas_matrix * result) { PetscInt i; PetscInt nnz = result->nnz; PetscInt nrows = result->nrows; PetscInt r_nnz; PetscErrorCode ierr; PetscTruth do_values = (result->values != NULL) ? PETSC_TRUE : PETSC_FALSE; PetscTruth block_data = result->block_data; PetscFunctionBegin; if (block_data) { // Allocate the column number array and point to it result->n_alloc_icol = nnz; ierr = PetscMalloc(nnz*sizeof(PetscInt), &result->alloc_icol); CHKERRQ(ierr); result->icols[0]= result->alloc_icol; for (i=1; iicols[i] = result->icols[i-1] + result->row_nnz[i-1]; } // Allocate the value array and point to it if (do_values) { result->n_alloc_val= nnz; ierr = PetscMalloc(nnz*sizeof(PetscScalar), &result->alloc_val); CHKERRQ(ierr); result->values[0]= result->alloc_val; for (i=1; ivalues[i] = result->values[i-1] + result->row_nnz[i-1]; } } } else { for (i=0; irow_nnz[i]; ierr = PetscMalloc(r_nnz*sizeof(PetscInt), &result->icols[i]); CHKERRQ(ierr); } if (do_values) { for (i=0; irow_nnz[i]; ierr = PetscMalloc(r_nnz*sizeof(PetscScalar), &result->values[i]); CHKERRQ(ierr); } } } PetscFunctionReturn(0); } /* spbas_row_order_icol determine if row i1 should come + before row i2 in the sorted rows (return -1), + after (return 1) + is identical (return 0). */ #undef __FUNCT__ #define __FUNCT__ "spbas_row_order_icol" int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type) { PetscInt j; PetscInt nnz1 = irow_in[i1+1] - irow_in[i1]; PetscInt nnz2 = irow_in[i2+1] - irow_in[i2]; PetscInt * icol1 = &icol_in[irow_in[i1]]; PetscInt * icol2 = &icol_in[irow_in[i2]]; if (nnz1nnz2) {return 1;} if (col_idx_type == SPBAS_COLUMN_NUMBERS) { for (j=0; j icol2[j]) {return 1;} } } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { for (j=0; j icol2[j]-i2) {return 1;} } } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { for (j=1; j icol2[j]-icol2[0]) {return 1;} } } return 0; } /* spbas_mergesort_icols: return a sorting of the rows in which identical sparseness patterns are next to each other */ #undef __FUNCT__ #define __FUNCT__ "spbas_mergesort_icols" PetscErrorCode spbas_mergesort_icols( PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort) { PetscErrorCode ierr; PetscInt istep; // Chunk-sizes of already sorted parts of arrays PetscInt i, i1, i2; // Loop counters for (partly) sorted arrays PetscInt istart, i1end, i2end; // start of newly sorted array part, end of both // parts PetscInt *ialloc; // Allocated arrays PetscInt *iswap; // auxiliary pointers for swapping PetscInt *ihlp1; // Pointers to new version of arrays, PetscInt *ihlp2; // Pointers to previous version of arrays, PetscFunctionBegin; // Create arrays ierr = PetscMalloc(nrows*sizeof(PetscInt),&ialloc); CHKERRQ(ierr); ihlp1 = ialloc; ihlp2 = isort; // Sorted array chunks are first 1 long, and increase until they are the // complete array for (istep=1; istepnrows) {i1end=nrows;} i2=istart+istep; i2end = i2+istep; if (i2end>nrows) {i2end=nrows;} // Merge the two array parts for (i=istart; inrows = nrows; B->ncols = ncols; B->nnz = nnz; B->col_idx_type= col_idx_type; B->block_data = PETSC_TRUE; ierr = spbas_allocate_pattern( B, no_values); CHKERRQ(ierr); // When using an offset array, set it if (col_idx_type==SPBAS_OFFSET_ARRAY) { for (i=0; iicol0[i] = icol_in[irow_in[i]];} } // Allocate the ordering for the rows ierr = PetscMalloc(nrows*sizeof(PetscInt),&isort); CHKERRQ(ierr); ierr = PetscMalloc(nrows*sizeof(PetscInt),&ipoint); CHKERRQ(ierr); ierr = PetscMalloc(nrows*sizeof(PetscTruth),&used); CHKERRQ(ierr); // Initialize the sorting memset((void*) used, 0, nrows*sizeof(PetscTruth)); for (i = 0; irow_nnz[i] = irow_in[i+1]-irow_in[i]; isort[i] = i; ipoint[i]= i; } // Sort the rows so that identical columns will be next to each other ierr = spbas_mergesort_icols( nrows, irow_in, icol_in, col_idx_type, isort); CHKERRQ(ierr); printf("Rows have been sorted for patterns\n"); // Replace identical rows with the first one in the list for (i=1; in_alloc_icol = 0; for (i=0; in_alloc_icol += B->row_nnz[i];} } ierr = PetscMalloc(B->n_alloc_icol*sizeof(PetscInt),&B->alloc_icol); CHKERRQ(ierr); // Fill in the diagonal offsets for the rows which store their own data ptr = 0; for (i=0; inrows; i++) { if (used[i]) { B->icols[i] = &B->alloc_icol[ptr]; icols = &icol_in[irow_in[i]]; row_nnz = B->row_nnz[i]; if (col_idx_type == SPBAS_COLUMN_NUMBERS) { for (j=0; jicols[i][j] = icols[j]; } } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { for (j=0; jicols[i][j] = icols[j]-i; } } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { for (j=0; jicols[i][j] = icols[j]-icols[0]; } } ptr += B->row_nnz[i]; } } // Point to the right places for all data for (i=0; iicols[i] = B->icols[ipoint[i]]; } printf("Row patterns have been compressed\n"); printf(" (%6.2f nonzeros per row)\n", (PetscReal) nnz / (PetscReal) nrows); // Clean up ierr=PetscFree(isort); CHKERRQ(ierr); ierr=PetscFree(used); CHKERRQ(ierr); ierr=PetscFree(ipoint); CHKERRQ(ierr); mem_compressed = spbas_memory_requirement( *B ); *mem_reduction = 100.0 * (PetscScalar)(mem_orig-mem_compressed)/ (PetscScalar) mem_orig; PetscFunctionReturn(0); } /* spbas_incomplete_cholesky Incomplete Cholesky decomposition */ #include "spbas_cholesky.h" /* spbas_delete : de-allocate the arrays owned by this matrix */ #undef __FUNCT__ #define __FUNCT__ "spbas_delete" PetscErrorCode spbas_delete(spbas_matrix matrix) { PetscInt i; PetscErrorCode ierr; PetscFunctionBegin; if (matrix.block_data) { ierr=PetscFree(matrix.alloc_icol); CHKERRQ(ierr) if (matrix.values){ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);} } else { for (i=0; i0 && j0 && j0 && j0 && j0 && j0 && jnrows = nrows; result->ncols = ncols; result->nnz = nnz; result->col_idx_type = SPBAS_COLUMN_NUMBERS; result->block_data = PETSC_TRUE; // Allocate sparseness pattern ierr = spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE); CHKERRQ(ierr); // Count the number of nonzeros in each row for (i = 0; irow_nnz[i] = 0; } for (i=0; irow_nnz[irow[j]]++; } } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) { for (j=0; jrow_nnz[i+irow[j]]++; } } else if (col_idx_type == SPBAS_OFFSET_ARRAY) { icol0=in_matrix.icol0[i]; for (j=0; jrow_nnz[icol0+irow[j]]++; } } } // Set the pointers to the data ierr = spbas_allocate_data(result); CHKERRQ(ierr); // Reset the number of nonzeros in each row for (i = 0; irow_nnz[i] = 0; } // Fill the data arrays if (in_matrix.values) { for (i=0; iicols[k][result->row_nnz[k]] = i; result->values[k][result->row_nnz[k]] = val[j]; result->row_nnz[k]++; } } } else { for (i=0; iicols[k][result->row_nnz[k]] = i; result->row_nnz[k]++; } } } // Set return value PetscFunctionReturn(0); } /* spbas_mergesort mergesort for an array of intergers and an array of associated reals on output, icol[0..nnz-1] is increasing; val[0..nnz-1] has undergone the same permutation as icol NB: val may be NULL: in that case, only the integers are sorted */ #undef __FUNCT__ #define __FUNCT__ "spbas_mergesort" PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val) { PetscInt istep; // Chunk-sizes of already sorted parts of arrays PetscInt i, i1, i2; // Loop counters for (partly) sorted arrays PetscInt istart, i1end, i2end; // start of newly sorted array part, end of both parts PetscInt *ialloc; // Allocated arrays PetscScalar *valloc=NULL; PetscInt *iswap; // auxiliary pointers for swapping PetscScalar *vswap; PetscInt *ihlp1; // Pointers to new version of arrays, PetscScalar *vhlp1=NULL; // (arrays under construction) PetscInt *ihlp2; // Pointers to previous version of arrays, PetscScalar *vhlp2=NULL; PetscErrorCode ierr; // Create arrays ierr = PetscMalloc(nnz*sizeof(PetscInt),&ialloc); CHKERRQ(ierr); ihlp1 = ialloc; ihlp2 = icol; if (val) { ierr = PetscMalloc(nnz*sizeof(PetscScalar),&valloc); CHKERRQ(ierr); vhlp1 = valloc; vhlp2 = val; } // Sorted array chunks are first 1 long, and increase until they are the // complete array for (istep=1; istepnnz) {i1end=nnz;} i2=istart+istep; i2end = i2+istep; if (i2end>nnz) {i2end=nnz;} // Merge the two array parts if (val) { for (i=istart; inrows; PetscInt * row_nnz; PetscInt **icols; PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; PetscScalar **vals=NULL; PetscErrorCode ierr; PetscFunctionBegin; if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) { SETERRQ( PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n"); } if (do_values) { ierr = PetscMalloc( sizeof(PetscScalar*)*nrows, &vals); CHKERRQ(ierr); } ierr = PetscMalloc( sizeof(PetscInt)*nrows, &row_nnz); CHKERRQ(ierr); ierr = PetscMalloc( sizeof(PetscInt*)*nrows, &icols); CHKERRQ(ierr); for (i=0; ivalues[ip];} icols[i] = matrix_A->icols[ip]; row_nnz[i] = matrix_A->row_nnz[ip]; for (j=0; jvalues); CHKERRQ(ierr);} ierr = PetscFree(matrix_A->icols); CHKERRQ(ierr); ierr = PetscFree(matrix_A->row_nnz); CHKERRQ(ierr); if (do_values) { matrix_A->values = vals; } matrix_A->icols = icols; matrix_A->row_nnz = row_nnz; PetscFunctionReturn(0); } /* spbas_apply_reordering_cols: apply the given reordering to the columns: matrix_A(:,perm) = matrix_A; */ #undef __FUNCT__ #define __FUNCT__ "spbas_apply_reordering_cols" PetscErrorCode spbas_apply_reordering_cols( spbas_matrix *matrix_A,const PetscInt *permutation) { PetscInt i,j; PetscInt nrows=matrix_A->nrows; PetscInt row_nnz; PetscInt *icols; PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE; PetscScalar *vals=NULL; PetscErrorCode ierr; PetscFunctionBegin; if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) { SETERRQ( PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n"); } for (i=0; iicols[i]; row_nnz = matrix_A->row_nnz[i]; if (do_values) { vals = matrix_A->values[i]; } for (j=0; jrow_nnz[row]; // For higher powers, call this function recursively if (marker>1) { for (i=0; iicols[row][i]; if (minmrk<=j && jicols[row][i], in_matrix, marker/2,minmrk,maxmrk); CHKERRQ(ierr); iwork[j] |= marker; } } } else { // Mark the columns reached for (i=0; iicols[row][i]; if (minmrk<=j && jblock_data) { SETERRQ( PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n"); } for (i=0; inrows; i++) { for (jstart=0; (jstartrow_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++){} if (jstart>0) { for (j=0; jrow_nnz[i]-jstart; j++) { inout_matrix->icols[i][j] = inout_matrix->icols[i][j+jstart]; } if (inout_matrix->values) { for (j=0; jrow_nnz[i]-jstart; j++) { inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart]; } } inout_matrix->row_nnz[i] -= jstart; inout_matrix->icols[i] = (PetscInt*) realloc( (void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt)); if (inout_matrix->values) { inout_matrix->values[i] = (PetscScalar*) realloc( (void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar)); } inout_matrix->nnz -= jstart; } } PetscFunctionReturn(0); }