1 /*
2 Defines the basic matrix operations for the AIJ (compressed row)
3 matrix storage format.
4 */
5
6 #include <petscconf.h>
7 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
8 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
9 #include <petscbt.h>
10 #include <../src/vec/vec/impls/dvecimpl.h>
11 #include <petsc/private/vecimpl.h>
12
13 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
14
15 #include <algorithm>
16 #include <vector>
17 #include <string>
18
19 #include "viennacl/linalg/prod.hpp"
20
21 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
22 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *);
23 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat);
24
MatViennaCLCopyToGPU(Mat A)25 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
26 {
27 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
28 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
29
30 PetscFunctionBegin;
31 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
32 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) {
33 PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
34
35 try {
36 if (a->compressedrow.use) {
37 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
38
39 // Since PetscInt is different from cl_uint, we have to convert:
40 viennacl::backend::mem_handle dummy;
41
42 viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
43 row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1);
44 for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]);
45
46 viennacl::backend::typesafe_host_array<unsigned int> row_indices;
47 row_indices.raw_resize(dummy, a->compressedrow.nrows);
48 for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]);
49
50 viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
51 col_buffer.raw_resize(dummy, a->nz);
52 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
53
54 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
55 PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
56 } else {
57 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
58
59 // Since PetscInt is in general different from cl_uint, we have to convert:
60 viennacl::backend::mem_handle dummy;
61
62 viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
63 row_buffer.raw_resize(dummy, A->rmap->n + 1);
64 for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]);
65
66 viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
67 col_buffer.raw_resize(dummy, a->nz);
68 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
69
70 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
71 PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
72 }
73 ViennaCLWaitForGPU();
74 } catch (std::exception const &ex) {
75 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
76 }
77
78 // Create temporary vector for v += A*x:
79 if (viennaclstruct->tempvec) {
80 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
81 delete (ViennaCLVector *)viennaclstruct->tempvec;
82 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
83 } else {
84 viennaclstruct->tempvec->clear();
85 }
86 } else {
87 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
88 }
89
90 A->offloadmask = PETSC_OFFLOAD_BOTH;
91
92 PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
93 }
94 }
95 PetscFunctionReturn(PETSC_SUCCESS);
96 }
97
MatViennaCLCopyFromGPU(Mat A,const ViennaCLAIJMatrix * Agpu)98 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
99 {
100 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
101 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
102 PetscInt m = A->rmap->n;
103
104 PetscFunctionBegin;
105 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(PETSC_SUCCESS);
106 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
107 try {
108 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
109 PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m);
110 a->nz = Agpu->nnz();
111 a->maxnz = a->nz; /* Since we allocate exactly the right amount */
112 A->preallocated = PETSC_TRUE;
113 if (a->free_a) PetscCall(PetscShmgetDeallocateArray((void **)a->a));
114 if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->j));
115 if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->i));
116 PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscScalar), (void **)&a->a));
117 PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscInt), (void **)&a->j));
118 PetscCall(PetscShmgetAllocateArray(m + 1, sizeof(PetscInt), (void **)&a->i));
119 a->free_a = PETSC_TRUE;
120 a->free_ij = PETSC_TRUE;
121
122 /* Setup row lengths */
123 PetscCall(PetscFree(a->imax));
124 PetscCall(PetscFree(a->ilen));
125 PetscCall(PetscMalloc1(m, &a->imax));
126 PetscCall(PetscMalloc1(m, &a->ilen));
127
128 /* Copy data back from GPU */
129 viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
130 row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
131
132 // copy row array
133 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
134 (a->i)[0] = row_buffer[0];
135 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
136 (a->i)[i + 1] = row_buffer[i + 1];
137 a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
138 }
139
140 // copy column indices
141 viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
142 col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
143 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
144 for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i];
145
146 // copy nonzero entries directly to destination (no conversion required)
147 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
148
149 PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar))));
150 ViennaCLWaitForGPU();
151 } catch (std::exception const &ex) {
152 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
153 }
154 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
155 PetscFunctionReturn(PETSC_SUCCESS);
156 } else {
157 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(PETSC_SUCCESS);
158
159 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
160 if (!Agpu) {
161 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a);
162 } else {
163 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
164 }
165 }
166 A->offloadmask = PETSC_OFFLOAD_BOTH;
167 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
168 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
169 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
170 PetscFunctionReturn(PETSC_SUCCESS);
171 }
172
MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)173 static PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy)
174 {
175 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
176 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
177 const ViennaCLVector *xgpu = NULL;
178 ViennaCLVector *ygpu = NULL;
179
180 PetscFunctionBegin;
181 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
182 PetscCall(MatViennaCLCopyToGPU(A));
183 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
184 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
185 PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu));
186 PetscCall(PetscLogGpuTimeBegin());
187 try {
188 if (a->compressedrow.use) {
189 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
190 } else {
191 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
192 }
193 ViennaCLWaitForGPU();
194 } catch (std::exception const &ex) {
195 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
196 }
197 PetscCall(PetscLogGpuTimeEnd());
198 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
199 PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu));
200 PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt));
201 } else {
202 PetscCall(VecSet_SeqViennaCL(yy, 0));
203 }
204 PetscFunctionReturn(PETSC_SUCCESS);
205 }
206
MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)207 static PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz)
208 {
209 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
210 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
211 const ViennaCLVector *xgpu = NULL, *ygpu = NULL;
212 ViennaCLVector *zgpu = NULL;
213
214 PetscFunctionBegin;
215 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
216 PetscCall(MatViennaCLCopyToGPU(A));
217 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
218 try {
219 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
220 PetscCall(VecViennaCLGetArrayRead(yy, &ygpu));
221 PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu));
222 PetscCall(PetscLogGpuTimeBegin());
223 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
224 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
225 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
226 else *zgpu += *viennaclstruct->tempvec;
227 ViennaCLWaitForGPU();
228 PetscCall(PetscLogGpuTimeEnd());
229 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
230 PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu));
231 PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu));
232
233 } catch (std::exception const &ex) {
234 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
235 }
236 PetscCall(PetscLogGpuFlops(2.0 * a->nz));
237 } else {
238 PetscCall(VecCopy_SeqViennaCL(yy, zz));
239 }
240 PetscFunctionReturn(PETSC_SUCCESS);
241 }
242
MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)243 static PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode)
244 {
245 PetscFunctionBegin;
246 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
247 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
248 if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A));
249 PetscFunctionReturn(PETSC_SUCCESS);
250 }
251
252 /*@C
253 MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format
254 (the default parallel PETSc format). This matrix will ultimately be pushed down
255 to GPUs and use the ViennaCL library for calculations.
256
257 Collective
258
259 Input Parameters:
260 + comm - MPI communicator, set to `PETSC_COMM_SELF`
261 . m - number of rows
262 . n - number of columns
263 . nz - number of nonzeros per row (same for all rows), ignored if `nnz` is set
264 - nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`
265
266 Output Parameter:
267 . A - the matrix
268
269 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
270 MatXXXXSetPreallocation() paradigm instead of this routine directly.
271 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
272
273 Notes:
274 The AIJ format, also called
275 compressed row storage, is fully compatible with standard Fortran
276 storage. That is, the stored row and column indices can begin at
277 either one (as in Fortran) or zero.
278
279 Specify the preallocated storage with either `nz` or `nnz` (not both).
280 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
281 allocation.
282
283 Level: intermediate
284
285 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`
286 @*/
MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)287 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
288 {
289 PetscFunctionBegin;
290 PetscCall(MatCreate(comm, A));
291 PetscCall(MatSetSizes(*A, m, n, m, n));
292 PetscCall(MatSetType(*A, MATSEQAIJVIENNACL));
293 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz));
294 PetscFunctionReturn(PETSC_SUCCESS);
295 }
296
MatDestroy_SeqAIJViennaCL(Mat A)297 static PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
298 {
299 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr;
300
301 PetscFunctionBegin;
302 try {
303 if (viennaclcontainer) {
304 delete viennaclcontainer->tempvec;
305 delete viennaclcontainer->mat;
306 delete viennaclcontainer->compressed_mat;
307 delete viennaclcontainer;
308 }
309 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
310 } catch (std::exception const &ex) {
311 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
312 }
313
314 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL));
315 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL));
316 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL));
317
318 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
319 A->spptr = 0;
320 PetscCall(MatDestroy_SeqAIJ(A));
321 PetscFunctionReturn(PETSC_SUCCESS);
322 }
323
MatCreate_SeqAIJViennaCL(Mat B)324 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
325 {
326 PetscFunctionBegin;
327 PetscCall(MatCreate_SeqAIJ(B));
328 PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B));
329 PetscFunctionReturn(PETSC_SUCCESS);
330 }
331
332 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool);
MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat * B)333 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B)
334 {
335 Mat C;
336
337 PetscFunctionBegin;
338 PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B));
339 C = *B;
340
341 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
342 C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
343
344 C->spptr = new Mat_SeqAIJViennaCL();
345 ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec = NULL;
346 ((Mat_SeqAIJViennaCL *)C->spptr)->mat = NULL;
347 ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL;
348
349 PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL));
350
351 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
352
353 /* If the source matrix is already assembled, copy the destination matrix to the GPU */
354 if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C));
355 PetscFunctionReturn(PETSC_SUCCESS);
356 }
357
MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar * array[])358 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[])
359 {
360 PetscFunctionBegin;
361 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
362 *array = ((Mat_SeqAIJ *)A->data)->a;
363 PetscFunctionReturn(PETSC_SUCCESS);
364 }
365
MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar * array[])366 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[])
367 {
368 PetscFunctionBegin;
369 A->offloadmask = PETSC_OFFLOAD_CPU;
370 *array = NULL;
371 PetscFunctionReturn(PETSC_SUCCESS);
372 }
373
MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar * array[])374 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[])
375 {
376 PetscFunctionBegin;
377 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
378 *array = ((Mat_SeqAIJ *)A->data)->a;
379 PetscFunctionReturn(PETSC_SUCCESS);
380 }
381
MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar * array[])382 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[])
383 {
384 PetscFunctionBegin;
385 *array = NULL;
386 /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */
387 PetscFunctionReturn(PETSC_SUCCESS);
388 }
389
MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar * array[])390 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[])
391 {
392 PetscFunctionBegin;
393 *array = ((Mat_SeqAIJ *)A->data)->a;
394 PetscFunctionReturn(PETSC_SUCCESS);
395 }
396
MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar * array[])397 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[])
398 {
399 PetscFunctionBegin;
400 A->offloadmask = PETSC_OFFLOAD_CPU;
401 *array = NULL;
402 PetscFunctionReturn(PETSC_SUCCESS);
403 }
404
MatBindToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)405 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg)
406 {
407 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
408
409 PetscFunctionBegin;
410 A->boundtocpu = flg;
411 if (flg && a->inode.size_csr) {
412 a->inode.use = PETSC_TRUE;
413 } else {
414 a->inode.use = PETSC_FALSE;
415 }
416 if (flg) {
417 /* make sure we have an up-to-date copy on the CPU */
418 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
419 A->ops->mult = MatMult_SeqAIJ;
420 A->ops->multadd = MatMultAdd_SeqAIJ;
421 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
422 A->ops->duplicate = MatDuplicate_SeqAIJ;
423 PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps)));
424 } else {
425 A->ops->mult = MatMult_SeqAIJViennaCL;
426 A->ops->multadd = MatMultAdd_SeqAIJViennaCL;
427 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
428 A->ops->destroy = MatDestroy_SeqAIJViennaCL;
429 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL;
430
431 a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL;
432 a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL;
433 a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL;
434 a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL;
435 a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL;
436 a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL;
437 }
438 PetscFunctionReturn(PETSC_SUCCESS);
439 }
440
MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat * newmat)441 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
442 {
443 Mat B;
444
445 PetscFunctionBegin;
446 PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
447
448 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat));
449
450 B = *newmat;
451
452 B->spptr = new Mat_SeqAIJViennaCL();
453
454 ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec = NULL;
455 ((Mat_SeqAIJViennaCL *)B->spptr)->mat = NULL;
456 ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL;
457
458 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
459 A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
460
461 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL));
462 PetscCall(PetscFree(B->defaultvectype));
463 PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype));
464
465 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL));
466 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense));
467 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ));
468
469 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
470
471 /* If the source matrix is already assembled, copy the destination matrix to the GPU */
472 if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B));
473 PetscFunctionReturn(PETSC_SUCCESS);
474 }
475
476 /*MC
477 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
478
479 A matrix type whose data resides on GPUs. These matrices are in CSR format by
480 default. All matrix calculations are performed using the ViennaCL library.
481
482 Options Database Keys:
483 + -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions()
484 . -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
485 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
486
487 Level: beginner
488
489 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()`
490 M*/
491
MatSolverTypeRegister_ViennaCL(void)492 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
493 {
494 PetscFunctionBegin;
495 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc));
496 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc));
497 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc));
498 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc));
499 PetscFunctionReturn(PETSC_SUCCESS);
500 }
501