1 /* 2 Defines a matrix-vector product for the MATMPIAIJCRL matrix class. 3 This class is derived from the MATMPIAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but augments 5 it with a column oriented storage that is more efficient for 6 matrix vector products on Vector machines. 7 8 CRL stands for constant row length (that is the same number of columns 9 is kept (padded with zeros) for each row of the sparse matrix. 10 11 See src/mat/impls/aij/seq/crl/crl.c for the sequential version 12 */ 13 14 #include <../src/mat/impls/aij/mpi/mpiaij.h> 15 #include <../src/mat/impls/aij/seq/crl/crl.h> 16 17 static PetscErrorCode MatDestroy_MPIAIJCRL(Mat A) 18 { 19 Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr; 20 21 PetscFunctionBegin; 22 if (aijcrl) { 23 PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols)); 24 PetscCall(VecDestroy(&aijcrl->fwork)); 25 PetscCall(VecDestroy(&aijcrl->xwork)); 26 PetscCall(PetscFree(aijcrl->array)); 27 } 28 PetscCall(PetscFree(A->spptr)); 29 30 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIAIJ)); 31 PetscCall(MatDestroy_MPIAIJ(A)); 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 static PetscErrorCode MatMPIAIJCRL_create_aijcrl(Mat A) 36 { 37 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 38 Mat_SeqAIJ *Aij = (Mat_SeqAIJ *)a->A->data, *Bij = (Mat_SeqAIJ *)a->B->data; 39 Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr; 40 PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 41 PetscInt nd = a->A->cmap->n; /* number of columns in diagonal portion */ 42 PetscInt *aj = Aij->j, *bj = Bij->j; /* From the CSR representation; points to the beginning of each row. */ 43 PetscInt i, j, rmax = 0, *icols, *ailen = Aij->ilen, *bilen = Bij->ilen; 44 PetscScalar *aa = Aij->a, *ba = Bij->a, *acols, *array; 45 46 PetscFunctionBegin; 47 /* determine the row with the most columns */ 48 for (i = 0; i < m; i++) rmax = PetscMax(rmax, ailen[i] + bilen[i]); 49 aijcrl->nz = Aij->nz + Bij->nz; 50 aijcrl->m = A->rmap->n; 51 aijcrl->rmax = rmax; 52 53 PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols)); 54 PetscCall(PetscMalloc2(rmax * m, &aijcrl->acols, rmax * m, &aijcrl->icols)); 55 acols = aijcrl->acols; 56 icols = aijcrl->icols; 57 for (i = 0; i < m; i++) { 58 for (j = 0; j < ailen[i]; j++) { 59 acols[j * m + i] = *aa++; 60 icols[j * m + i] = *aj++; 61 } 62 for (; j < ailen[i] + bilen[i]; j++) { 63 acols[j * m + i] = *ba++; 64 icols[j * m + i] = nd + *bj++; 65 } 66 for (; j < rmax; j++) { /* empty column entries */ 67 acols[j * m + i] = 0.0; 68 icols[j * m + i] = (j) ? icols[(j - 1) * m + i] : 0; /* handle case where row is EMPTY */ 69 } 70 } 71 PetscCall(PetscInfo(A, "Percentage of 0's introduced for vectorized multiply %g\n", 1.0 - ((double)aijcrl->nz) / PetscMax((double)rmax * m, 1))); 72 73 PetscCall(PetscFree(aijcrl->array)); 74 PetscCall(PetscMalloc1(a->B->cmap->n + nd, &array)); 75 /* xwork array is actually B->n+nd long, but we define xwork this length so can copy into it */ 76 PetscCall(VecDestroy(&aijcrl->xwork)); 77 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), 1, nd, PETSC_DECIDE, array, &aijcrl->xwork)); 78 PetscCall(VecDestroy(&aijcrl->fwork)); 79 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, a->B->cmap->n, array + nd, &aijcrl->fwork)); 80 81 aijcrl->array = array; 82 aijcrl->xscat = a->Mvctx; 83 PetscFunctionReturn(PETSC_SUCCESS); 84 } 85 86 static PetscErrorCode MatAssemblyEnd_MPIAIJCRL(Mat A, MatAssemblyType mode) 87 { 88 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 89 Mat_SeqAIJ *Aij = (Mat_SeqAIJ *)a->A->data, *Bij = (Mat_SeqAIJ *)a->A->data; 90 91 PetscFunctionBegin; 92 Aij->inode.use = PETSC_FALSE; 93 Bij->inode.use = PETSC_FALSE; 94 95 PetscCall(MatAssemblyEnd_MPIAIJ(A, mode)); 96 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 97 98 /* Now calculate the permutation and grouping information. */ 99 PetscCall(MatMPIAIJCRL_create_aijcrl(A)); 100 PetscFunctionReturn(PETSC_SUCCESS); 101 } 102 103 extern PetscErrorCode MatMult_AIJCRL(Mat, Vec, Vec); 104 extern PetscErrorCode MatDuplicate_AIJCRL(Mat, MatDuplicateOption, Mat *); 105 106 /* MatConvert_MPIAIJ_MPIAIJCRL converts a MPIAIJ matrix into a 107 * MPIAIJCRL matrix. This routine is called by the MatCreate_MPIAIJCRL() 108 * routine, but can also be used to convert an assembled MPIAIJ matrix 109 * into a MPIAIJCRL one. */ 110 111 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 112 { 113 Mat B = *newmat; 114 Mat_AIJCRL *aijcrl; 115 116 PetscFunctionBegin; 117 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 118 119 PetscCall(PetscNew(&aijcrl)); 120 B->spptr = (void *)aijcrl; 121 122 /* Set function pointers for methods that we inherit from AIJ but override. */ 123 B->ops->duplicate = MatDuplicate_AIJCRL; 124 B->ops->assemblyend = MatAssemblyEnd_MPIAIJCRL; 125 B->ops->destroy = MatDestroy_MPIAIJCRL; 126 B->ops->mult = MatMult_AIJCRL; 127 128 /* If A has already been assembled, compute the permutation. */ 129 if (A->assembled) PetscCall(MatMPIAIJCRL_create_aijcrl(B)); 130 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJCRL)); 131 *newmat = B; 132 PetscFunctionReturn(PETSC_SUCCESS); 133 } 134 135 /*@C 136 MatCreateMPIAIJCRL - Creates a sparse matrix of type `MATMPIAIJCRL`. 137 138 Collective 139 140 Input Parameters: 141 + comm - MPI communicator, set to `PETSC_COMM_SELF` 142 . m - number of rows 143 . n - number of columns 144 . nz - number of nonzeros per row (same for all rows), for the "diagonal" submatrix 145 . nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`, for the "diagonal" submatrix 146 . onz - number of nonzeros per row (same for all rows), for the "off-diagonal" submatrix 147 - onnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`, for the "off-diagonal" submatrix 148 149 Output Parameter: 150 . A - the matrix 151 152 Level: intermediate 153 154 Notes: 155 This type inherits from `MATAIJ`, but stores some additional information that is used to 156 allow better vectorization of the matrix-vector product. At the cost of increased storage, 157 the AIJ formatted matrix can be copied to a format in which pieces of the matrix are stored 158 in ELLPACK format, allowing the vectorized matrix multiply routine to use stride-1 memory 159 accesses. 160 161 If `nnz` is given then `nz` is ignored 162 163 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MATAIJ`, `MATAIJSELL`, `MATAIJPERM`, `MATAIJMKL`, `MatCreate()`, `MatCreateMPIAIJPERM()`, `MatSetValues()` 164 @*/ 165 PetscErrorCode MatCreateMPIAIJCRL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], PetscInt onz, const PetscInt onnz[], Mat *A) 166 { 167 PetscFunctionBegin; 168 PetscCall(MatCreate(comm, A)); 169 PetscCall(MatSetSizes(*A, m, n, m, n)); 170 PetscCall(MatSetType(*A, MATMPIAIJCRL)); 171 PetscCall(MatMPIAIJSetPreallocation_MPIAIJ(*A, nz, (PetscInt *)nnz, onz, (PetscInt *)onnz)); 172 PetscFunctionReturn(PETSC_SUCCESS); 173 } 174 175 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJCRL(Mat A) 176 { 177 PetscFunctionBegin; 178 PetscCall(MatSetType(A, MATMPIAIJ)); 179 PetscCall(MatConvert_MPIAIJ_MPIAIJCRL(A, MATMPIAIJCRL, MAT_INPLACE_MATRIX, &A)); 180 PetscFunctionReturn(PETSC_SUCCESS); 181 } 182