1 /* 2 Defines a matrix-vector product for the MATSEQAIJCRL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but augments 5 it with a column oriented storage that is more efficient for 6 matrix vector products on Vector machines. 7 8 CRL stands for constant row length (that is the same number of columns 9 is kept (padded with zeros) for each row of the sparse matrix. 10 */ 11 #include <../src/mat/impls/aij/seq/crl/crl.h> 12 13 static PetscErrorCode MatDestroy_SeqAIJCRL(Mat A) 14 { 15 Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr; 16 17 PetscFunctionBegin; 18 /* Free everything in the Mat_AIJCRL data structure. */ 19 if (aijcrl) PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols)); 20 PetscCall(PetscFree(A->spptr)); 21 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 22 PetscCall(MatDestroy_SeqAIJ(A)); 23 PetscFunctionReturn(PETSC_SUCCESS); 24 } 25 26 PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M) 27 { 28 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot duplicate AIJCRL matrices yet"); 29 } 30 31 static PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A) 32 { 33 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 34 Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr; 35 PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 36 PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */ 37 PetscInt i, j, rmax = a->rmax, *icols, *ilen = a->ilen; 38 MatScalar *aa = a->a; 39 PetscScalar *acols; 40 41 PetscFunctionBegin; 42 aijcrl->nz = a->nz; 43 aijcrl->m = A->rmap->n; 44 aijcrl->rmax = rmax; 45 46 PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols)); 47 PetscCall(PetscMalloc2(rmax * m, &aijcrl->acols, rmax * m, &aijcrl->icols)); 48 acols = aijcrl->acols; 49 icols = aijcrl->icols; 50 for (i = 0; i < m; i++) { 51 for (j = 0; j < ilen[i]; j++) { 52 acols[j * m + i] = *aa++; 53 icols[j * m + i] = *aj++; 54 } 55 for (; j < rmax; j++) { /* empty column entries */ 56 acols[j * m + i] = 0.0; 57 icols[j * m + i] = (j) ? icols[(j - 1) * m + i] : 0; /* handle case where row is EMPTY */ 58 } 59 } 60 PetscCall(PetscInfo(A, "Percentage of 0's introduced for vectorized multiply %g. Rmax= %" PetscInt_FMT "\n", 1.0 - ((double)a->nz) / PetscMax((double)rmax * m, 1), rmax)); 61 PetscFunctionReturn(PETSC_SUCCESS); 62 } 63 64 static PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode) 65 { 66 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 67 68 PetscFunctionBegin; 69 a->inode.use = PETSC_FALSE; 70 71 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 72 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 73 74 /* Now calculate the permutation and grouping information. */ 75 PetscCall(MatSeqAIJCRL_create_aijcrl(A)); 76 PetscFunctionReturn(PETSC_SUCCESS); 77 } 78 79 #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h> 80 81 /* 82 Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL 83 - the scatter is used only in the parallel version 84 85 */ 86 PetscErrorCode MatMult_AIJCRL(Mat A, Vec xx, Vec yy) 87 { 88 Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr; 89 PetscInt m = aijcrl->m; /* Number of rows in the matrix. */ 90 PetscInt rmax = aijcrl->rmax, *icols = aijcrl->icols; 91 PetscScalar *acols = aijcrl->acols; 92 PetscScalar *y; 93 const PetscScalar *x; 94 #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 95 PetscInt i, j, ii; 96 #endif 97 98 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 99 #pragma disjoint(*x, *y, *aa) 100 #endif 101 102 PetscFunctionBegin; 103 if (aijcrl->xscat) { 104 PetscCall(VecCopy(xx, aijcrl->xwork)); 105 /* get remote values needed for local part of multiply */ 106 PetscCall(VecScatterBegin(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD)); 107 PetscCall(VecScatterEnd(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD)); 108 xx = aijcrl->xwork; 109 } 110 111 PetscCall(VecGetArrayRead(xx, &x)); 112 PetscCall(VecGetArray(yy, &y)); 113 114 #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 115 fortranmultcrl_(&m, &rmax, x, y, icols, acols); 116 #else 117 118 /* first column */ 119 for (j = 0; j < m; j++) y[j] = acols[j] * x[icols[j]]; 120 121 /* other columns */ 122 #if defined(PETSC_HAVE_CRAY_VECTOR) 123 #pragma _CRI preferstream 124 #endif 125 for (i = 1; i < rmax; i++) { 126 ii = i * m; 127 #if defined(PETSC_HAVE_CRAY_VECTOR) 128 #pragma _CRI prefervector 129 #endif 130 for (j = 0; j < m; j++) y[j] = y[j] + acols[ii + j] * x[icols[ii + j]]; 131 } 132 #endif 133 PetscCall(PetscLogFlops(2.0 * aijcrl->nz - m)); 134 PetscCall(VecRestoreArrayRead(xx, &x)); 135 PetscCall(VecRestoreArray(yy, &y)); 136 PetscFunctionReturn(PETSC_SUCCESS); 137 } 138 139 /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a 140 * SeqAIJCRL matrix. This routine is called by the MatCreate_SeqAIJCRL() 141 * routine, but can also be used to convert an assembled SeqAIJ matrix 142 * into a SeqAIJCRL one. */ 143 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 144 { 145 Mat B = *newmat; 146 Mat_AIJCRL *aijcrl; 147 PetscBool sametype; 148 149 PetscFunctionBegin; 150 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 151 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 152 if (sametype) PetscFunctionReturn(PETSC_SUCCESS); 153 154 PetscCall(PetscNew(&aijcrl)); 155 B->spptr = (void *)aijcrl; 156 157 /* Set function pointers for methods that we inherit from AIJ but override. */ 158 B->ops->duplicate = MatDuplicate_AIJCRL; 159 B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL; 160 B->ops->destroy = MatDestroy_SeqAIJCRL; 161 B->ops->mult = MatMult_AIJCRL; 162 163 /* If A has already been assembled, compute the permutation. */ 164 if (A->assembled) PetscCall(MatSeqAIJCRL_create_aijcrl(B)); 165 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJCRL)); 166 *newmat = B; 167 PetscFunctionReturn(PETSC_SUCCESS); 168 } 169 170 /*@C 171 MatCreateSeqAIJCRL - Creates a sparse matrix of type `MATSEQAIJCRL`. 172 173 Collective 174 175 Input Parameters: 176 + comm - MPI communicator, set to `PETSC_COMM_SELF` 177 . m - number of rows 178 . n - number of columns 179 . nz - number of nonzeros per row (same for all rows), ignored if `nnz` is given 180 - nnz - array containing the number of nonzeros in the various rows 181 (possibly different for each row) or `NULL` 182 183 Output Parameter: 184 . A - the matrix 185 186 Level: intermediate 187 188 Notes: 189 This type inherits from `MATSEQAIJ`, but stores some additional information that is used to 190 allow better vectorization of the matrix-vector product. At the cost of increased storage, 191 the `MATSEQAIJ` formatted matrix can be copied to a format in which pieces of the matrix are 192 stored in ELLPACK format, allowing the vectorized matrix multiply routine to use stride-1 193 memory accesses. 194 195 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJPERM()`, `MatSetValues()` 196 @*/ 197 PetscErrorCode MatCreateSeqAIJCRL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 198 { 199 PetscFunctionBegin; 200 PetscCall(MatCreate(comm, A)); 201 PetscCall(MatSetSizes(*A, m, n, m, n)); 202 PetscCall(MatSetType(*A, MATSEQAIJCRL)); 203 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz)); 204 PetscFunctionReturn(PETSC_SUCCESS); 205 } 206 207 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A) 208 { 209 PetscFunctionBegin; 210 PetscCall(MatSetType(A, MATSEQAIJ)); 211 PetscCall(MatConvert_SeqAIJ_SeqAIJCRL(A, MATSEQAIJCRL, MAT_INPLACE_MATRIX, &A)); 212 PetscFunctionReturn(PETSC_SUCCESS); 213 } 214