1 /*
2 Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
3 This class is derived from the MATSEQAIJ class and retains the
4 compressed row storage (aka Yale sparse matrix format) but augments
5 it with a column oriented storage that is more efficient for
6 matrix vector products on Vector machines.
7
8 CRL stands for constant row length (that is the same number of columns
9 is kept (padded with zeros) for each row of the sparse matrix.
10 */
11 #include <../src/mat/impls/aij/seq/crl/crl.h>
12
MatDestroy_SeqAIJCRL(Mat A)13 static PetscErrorCode MatDestroy_SeqAIJCRL(Mat A)
14 {
15 Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr;
16
17 PetscFunctionBegin;
18 /* Free everything in the Mat_AIJCRL data structure. */
19 if (aijcrl) PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols));
20 PetscCall(PetscFree(A->spptr));
21 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
22 PetscCall(MatDestroy_SeqAIJ(A));
23 PetscFunctionReturn(PETSC_SUCCESS);
24 }
25
MatDuplicate_AIJCRL(Mat A,MatDuplicateOption op,Mat * M)26 PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M)
27 {
28 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot duplicate AIJCRL matrices yet");
29 }
30
MatSeqAIJCRL_create_aijcrl(Mat A)31 static PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A)
32 {
33 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
34 Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr;
35 PetscInt m = A->rmap->n; /* Number of rows in the matrix. */
36 PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */
37 PetscInt i, j, rmax = a->rmax, *icols, *ilen = a->ilen;
38 MatScalar *aa = a->a;
39 PetscScalar *acols;
40
41 PetscFunctionBegin;
42 aijcrl->nz = a->nz;
43 aijcrl->m = A->rmap->n;
44 aijcrl->rmax = rmax;
45
46 PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols));
47 PetscCall(PetscMalloc2(rmax * m, &aijcrl->acols, rmax * m, &aijcrl->icols));
48 acols = aijcrl->acols;
49 icols = aijcrl->icols;
50 for (i = 0; i < m; i++) {
51 for (j = 0; j < ilen[i]; j++) {
52 acols[j * m + i] = *aa++;
53 icols[j * m + i] = *aj++;
54 }
55 for (; j < rmax; j++) { /* empty column entries */
56 acols[j * m + i] = 0.0;
57 icols[j * m + i] = (j) ? icols[(j - 1) * m + i] : 0; /* handle case where row is EMPTY */
58 }
59 }
60 PetscCall(PetscInfo(A, "Percentage of 0's introduced for vectorized multiply %g. Rmax= %" PetscInt_FMT "\n", 1.0 - ((double)a->nz) / PetscMax((double)rmax * m, 1), rmax));
61 PetscFunctionReturn(PETSC_SUCCESS);
62 }
63
MatAssemblyEnd_SeqAIJCRL(Mat A,MatAssemblyType mode)64 static PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode)
65 {
66 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
67
68 PetscFunctionBegin;
69 a->inode.use = PETSC_FALSE;
70
71 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
72 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
73
74 /* Now calculate the permutation and grouping information. */
75 PetscCall(MatSeqAIJCRL_create_aijcrl(A));
76 PetscFunctionReturn(PETSC_SUCCESS);
77 }
78
79 #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h>
80
81 /*
82 Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL
83 - the scatter is used only in the parallel version
84
85 */
MatMult_AIJCRL(Mat A,Vec xx,Vec yy)86 PetscErrorCode MatMult_AIJCRL(Mat A, Vec xx, Vec yy)
87 {
88 Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr;
89 PetscInt m = aijcrl->m; /* Number of rows in the matrix. */
90 PetscInt rmax = aijcrl->rmax, *icols = aijcrl->icols;
91 PetscScalar *acols = aijcrl->acols;
92 PetscScalar *y;
93 const PetscScalar *x;
94 #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
95 PetscInt i, j, ii;
96 #endif
97
98 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
99 #pragma disjoint(*x, *y, *aa)
100 #endif
101
102 PetscFunctionBegin;
103 if (aijcrl->xscat) {
104 PetscCall(VecCopy(xx, aijcrl->xwork));
105 /* get remote values needed for local part of multiply */
106 PetscCall(VecScatterBegin(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD));
107 PetscCall(VecScatterEnd(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD));
108 xx = aijcrl->xwork;
109 }
110
111 PetscCall(VecGetArrayRead(xx, &x));
112 PetscCall(VecGetArray(yy, &y));
113
114 #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
115 fortranmultcrl_(&m, &rmax, x, y, icols, acols);
116 #else
117
118 /* first column */
119 for (j = 0; j < m; j++) y[j] = acols[j] * x[icols[j]];
120
121 /* other columns */
122 #if defined(PETSC_HAVE_CRAY_VECTOR)
123 #pragma _CRI preferstream
124 #endif
125 for (i = 1; i < rmax; i++) {
126 ii = i * m;
127 #if defined(PETSC_HAVE_CRAY_VECTOR)
128 #pragma _CRI prefervector
129 #endif
130 for (j = 0; j < m; j++) y[j] = y[j] + acols[ii + j] * x[icols[ii + j]];
131 }
132 #endif
133 PetscCall(PetscLogFlops(2.0 * aijcrl->nz - m));
134 PetscCall(VecRestoreArrayRead(xx, &x));
135 PetscCall(VecRestoreArray(yy, &y));
136 PetscFunctionReturn(PETSC_SUCCESS);
137 }
138
139 /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a
140 * SeqAIJCRL matrix. This routine is called by the MatCreate_SeqAIJCRL()
141 * routine, but can also be used to convert an assembled SeqAIJ matrix
142 * into a SeqAIJCRL one. */
MatConvert_SeqAIJ_SeqAIJCRL(Mat A,MatType type,MatReuse reuse,Mat * newmat)143 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
144 {
145 Mat B = *newmat;
146 Mat_AIJCRL *aijcrl;
147 PetscBool sametype;
148
149 PetscFunctionBegin;
150 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
151 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
152 if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
153
154 PetscCall(PetscNew(&aijcrl));
155 B->spptr = (void *)aijcrl;
156
157 /* Set function pointers for methods that we inherit from AIJ but override. */
158 B->ops->duplicate = MatDuplicate_AIJCRL;
159 B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL;
160 B->ops->destroy = MatDestroy_SeqAIJCRL;
161 B->ops->mult = MatMult_AIJCRL;
162
163 /* If A has already been assembled, compute the permutation. */
164 if (A->assembled) PetscCall(MatSeqAIJCRL_create_aijcrl(B));
165 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJCRL));
166 *newmat = B;
167 PetscFunctionReturn(PETSC_SUCCESS);
168 }
169
170 /*@C
171 MatCreateSeqAIJCRL - Creates a sparse matrix of type `MATSEQAIJCRL`.
172
173 Collective
174
175 Input Parameters:
176 + comm - MPI communicator, set to `PETSC_COMM_SELF`
177 . m - number of rows
178 . n - number of columns
179 . nz - number of nonzeros per row (same for all rows), ignored if `nnz` is given
180 - nnz - array containing the number of nonzeros in the various rows
181 (possibly different for each row) or `NULL`
182
183 Output Parameter:
184 . A - the matrix
185
186 Level: intermediate
187
188 Notes:
189 This type inherits from `MATSEQAIJ`, but stores some additional information that is used to
190 allow better vectorization of the matrix-vector product. At the cost of increased storage,
191 the `MATSEQAIJ` formatted matrix can be copied to a format in which pieces of the matrix are
192 stored in ELLPACK format, allowing the vectorized matrix multiply routine to use stride-1
193 memory accesses.
194
195 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJPERM()`, `MatSetValues()`
196 @*/
MatCreateSeqAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)197 PetscErrorCode MatCreateSeqAIJCRL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
198 {
199 PetscFunctionBegin;
200 PetscCall(MatCreate(comm, A));
201 PetscCall(MatSetSizes(*A, m, n, m, n));
202 PetscCall(MatSetType(*A, MATSEQAIJCRL));
203 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
204 PetscFunctionReturn(PETSC_SUCCESS);
205 }
206
MatCreate_SeqAIJCRL(Mat A)207 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A)
208 {
209 PetscFunctionBegin;
210 PetscCall(MatSetType(A, MATSEQAIJ));
211 PetscCall(MatConvert_SeqAIJ_SeqAIJCRL(A, MATSEQAIJCRL, MAT_INPLACE_MATRIX, &A));
212 PetscFunctionReturn(PETSC_SUCCESS);
213 }
214