xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision 03047865b8d8757cf1cf9cda45785c1537b01dc1)
1 /*
2     Defines the basic matrix operations for the AIJ (compressed row)
3   matrix storage format.
4 */
5 
6 #include <petscconf.h>
7 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
8 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
9 #include <petscbt.h>
10 #include <../src/vec/vec/impls/dvecimpl.h>
11 #include <petsc/private/vecimpl.h>
12 
13 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
14 
15 #include <algorithm>
16 #include <vector>
17 #include <string>
18 
19 #include "viennacl/linalg/prod.hpp"
20 
21 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
22 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *);
23 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat);
24 
MatViennaCLCopyToGPU(Mat A)25 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
26 {
27   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
28   Mat_SeqAIJ         *a              = (Mat_SeqAIJ *)A->data;
29 
30   PetscFunctionBegin;
31   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
32     if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) {
33       PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
34 
35       try {
36         if (a->compressedrow.use) {
37           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
38 
39           // Since PetscInt is different from cl_uint, we have to convert:
40           viennacl::backend::mem_handle dummy;
41 
42           viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
43           row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1);
44           for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]);
45 
46           viennacl::backend::typesafe_host_array<unsigned int> row_indices;
47           row_indices.raw_resize(dummy, a->compressedrow.nrows);
48           for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]);
49 
50           viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
51           col_buffer.raw_resize(dummy, a->nz);
52           for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
53 
54           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
55           PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
56         } else {
57           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
58 
59           // Since PetscInt is in general different from cl_uint, we have to convert:
60           viennacl::backend::mem_handle dummy;
61 
62           viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
63           row_buffer.raw_resize(dummy, A->rmap->n + 1);
64           for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]);
65 
66           viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
67           col_buffer.raw_resize(dummy, a->nz);
68           for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
69 
70           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
71           PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
72         }
73         ViennaCLWaitForGPU();
74       } catch (std::exception const &ex) {
75         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
76       }
77 
78       // Create temporary vector for v += A*x:
79       if (viennaclstruct->tempvec) {
80         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
81           delete (ViennaCLVector *)viennaclstruct->tempvec;
82           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
83         } else {
84           viennaclstruct->tempvec->clear();
85         }
86       } else {
87         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
88       }
89 
90       A->offloadmask = PETSC_OFFLOAD_BOTH;
91 
92       PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
93     }
94   }
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
MatViennaCLCopyFromGPU(Mat A,const ViennaCLAIJMatrix * Agpu)98 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
99 {
100   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
101   Mat_SeqAIJ         *a              = (Mat_SeqAIJ *)A->data;
102   PetscInt            m              = A->rmap->n;
103 
104   PetscFunctionBegin;
105   if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(PETSC_SUCCESS);
106   if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
107     try {
108       PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
109       PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m);
110       a->nz           = Agpu->nnz();
111       a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
112       A->preallocated = PETSC_TRUE;
113       if (a->free_a) PetscCall(PetscShmgetDeallocateArray((void **)a->a));
114       if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->j));
115       if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->i));
116       PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscScalar), (void **)&a->a));
117       PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscInt), (void **)&a->j));
118       PetscCall(PetscShmgetAllocateArray(m + 1, sizeof(PetscInt), (void **)&a->i));
119       a->free_a  = PETSC_TRUE;
120       a->free_ij = PETSC_TRUE;
121 
122       /* Setup row lengths */
123       PetscCall(PetscFree(a->imax));
124       PetscCall(PetscFree(a->ilen));
125       PetscCall(PetscMalloc1(m, &a->imax));
126       PetscCall(PetscMalloc1(m, &a->ilen));
127 
128       /* Copy data back from GPU */
129       viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
130       row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
131 
132       // copy row array
133       viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
134       (a->i)[0] = row_buffer[0];
135       for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
136         (a->i)[i + 1] = row_buffer[i + 1];
137         a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
138       }
139 
140       // copy column indices
141       viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
142       col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
143       viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
144       for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i];
145 
146       // copy nonzero entries directly to destination (no conversion required)
147       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
148 
149       PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar))));
150       ViennaCLWaitForGPU();
151     } catch (std::exception const &ex) {
152       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
153     }
154   } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
155     PetscFunctionReturn(PETSC_SUCCESS);
156   } else {
157     if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(PETSC_SUCCESS);
158 
159     PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
160     if (!Agpu) {
161       viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a);
162     } else {
163       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
164     }
165   }
166   A->offloadmask = PETSC_OFFLOAD_BOTH;
167   /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
168   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
169   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
170   PetscFunctionReturn(PETSC_SUCCESS);
171 }
172 
MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)173 static PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy)
174 {
175   Mat_SeqAIJ           *a              = (Mat_SeqAIJ *)A->data;
176   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
177   const ViennaCLVector *xgpu           = NULL;
178   ViennaCLVector       *ygpu           = NULL;
179 
180   PetscFunctionBegin;
181   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
182   PetscCall(MatViennaCLCopyToGPU(A));
183   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
184     PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
185     PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu));
186     PetscCall(PetscLogGpuTimeBegin());
187     try {
188       if (a->compressedrow.use) {
189         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
190       } else {
191         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
192       }
193       ViennaCLWaitForGPU();
194     } catch (std::exception const &ex) {
195       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
196     }
197     PetscCall(PetscLogGpuTimeEnd());
198     PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
199     PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu));
200     PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt));
201   } else {
202     PetscCall(VecSet_SeqViennaCL(yy, 0));
203   }
204   PetscFunctionReturn(PETSC_SUCCESS);
205 }
206 
MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)207 static PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz)
208 {
209   Mat_SeqAIJ           *a              = (Mat_SeqAIJ *)A->data;
210   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
211   const ViennaCLVector *xgpu = NULL, *ygpu = NULL;
212   ViennaCLVector       *zgpu = NULL;
213 
214   PetscFunctionBegin;
215   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
216   PetscCall(MatViennaCLCopyToGPU(A));
217   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
218     try {
219       PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
220       PetscCall(VecViennaCLGetArrayRead(yy, &ygpu));
221       PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu));
222       PetscCall(PetscLogGpuTimeBegin());
223       if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
224       else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
225       if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
226       else *zgpu += *viennaclstruct->tempvec;
227       ViennaCLWaitForGPU();
228       PetscCall(PetscLogGpuTimeEnd());
229       PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
230       PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu));
231       PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu));
232 
233     } catch (std::exception const &ex) {
234       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
235     }
236     PetscCall(PetscLogGpuFlops(2.0 * a->nz));
237   } else {
238     PetscCall(VecCopy_SeqViennaCL(yy, zz));
239   }
240   PetscFunctionReturn(PETSC_SUCCESS);
241 }
242 
MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)243 static PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode)
244 {
245   PetscFunctionBegin;
246   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
247   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
248   if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A));
249   PetscFunctionReturn(PETSC_SUCCESS);
250 }
251 
252 /*@C
253   MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format
254   (the default parallel PETSc format).  This matrix will ultimately be pushed down
255   to GPUs and use the ViennaCL library for calculations.
256 
257   Collective
258 
259   Input Parameters:
260 + comm - MPI communicator, set to `PETSC_COMM_SELF`
261 . m    - number of rows
262 . n    - number of columns
263 . nz   - number of nonzeros per row (same for all rows), ignored if `nnz` is set
264 - nnz  - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`
265 
266   Output Parameter:
267 . A - the matrix
268 
269    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
270    MatXXXXSetPreallocation() paradigm instead of this routine directly.
271    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
272 
273   Notes:
274   The AIJ format, also called
275   compressed row storage, is fully compatible with standard Fortran
276   storage.  That is, the stored row and column indices can begin at
277   either one (as in Fortran) or zero.
278 
279   Specify the preallocated storage with either `nz` or `nnz` (not both).
280   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
281   allocation.
282 
283   Level: intermediate
284 
285 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`
286 @*/
MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)287 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
288 {
289   PetscFunctionBegin;
290   PetscCall(MatCreate(comm, A));
291   PetscCall(MatSetSizes(*A, m, n, m, n));
292   PetscCall(MatSetType(*A, MATSEQAIJVIENNACL));
293   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz));
294   PetscFunctionReturn(PETSC_SUCCESS);
295 }
296 
MatDestroy_SeqAIJViennaCL(Mat A)297 static PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
298 {
299   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr;
300 
301   PetscFunctionBegin;
302   try {
303     if (viennaclcontainer) {
304       delete viennaclcontainer->tempvec;
305       delete viennaclcontainer->mat;
306       delete viennaclcontainer->compressed_mat;
307       delete viennaclcontainer;
308     }
309     A->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
310   } catch (std::exception const &ex) {
311     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
312   }
313 
314   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL));
315   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL));
316   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL));
317 
318   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
319   A->spptr = 0;
320   PetscCall(MatDestroy_SeqAIJ(A));
321   PetscFunctionReturn(PETSC_SUCCESS);
322 }
323 
MatCreate_SeqAIJViennaCL(Mat B)324 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
325 {
326   PetscFunctionBegin;
327   PetscCall(MatCreate_SeqAIJ(B));
328   PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B));
329   PetscFunctionReturn(PETSC_SUCCESS);
330 }
331 
332 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool);
MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat * B)333 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B)
334 {
335   Mat C;
336 
337   PetscFunctionBegin;
338   PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B));
339   C = *B;
340 
341   PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
342   C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
343 
344   C->spptr                                         = new Mat_SeqAIJViennaCL();
345   ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec        = NULL;
346   ((Mat_SeqAIJViennaCL *)C->spptr)->mat            = NULL;
347   ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL;
348 
349   PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL));
350 
351   C->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
352 
353   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
354   if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C));
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar * array[])358 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[])
359 {
360   PetscFunctionBegin;
361   PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
362   *array = ((Mat_SeqAIJ *)A->data)->a;
363   PetscFunctionReturn(PETSC_SUCCESS);
364 }
365 
MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar * array[])366 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[])
367 {
368   PetscFunctionBegin;
369   A->offloadmask = PETSC_OFFLOAD_CPU;
370   *array         = NULL;
371   PetscFunctionReturn(PETSC_SUCCESS);
372 }
373 
MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar * array[])374 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[])
375 {
376   PetscFunctionBegin;
377   PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
378   *array = ((Mat_SeqAIJ *)A->data)->a;
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar * array[])382 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[])
383 {
384   PetscFunctionBegin;
385   *array = NULL;
386   /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */
387   PetscFunctionReturn(PETSC_SUCCESS);
388 }
389 
MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar * array[])390 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[])
391 {
392   PetscFunctionBegin;
393   *array = ((Mat_SeqAIJ *)A->data)->a;
394   PetscFunctionReturn(PETSC_SUCCESS);
395 }
396 
MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar * array[])397 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[])
398 {
399   PetscFunctionBegin;
400   A->offloadmask = PETSC_OFFLOAD_CPU;
401   *array         = NULL;
402   PetscFunctionReturn(PETSC_SUCCESS);
403 }
404 
MatBindToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)405 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg)
406 {
407   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
408 
409   PetscFunctionBegin;
410   A->boundtocpu = flg;
411   if (flg && a->inode.size_csr) {
412     a->inode.use = PETSC_TRUE;
413   } else {
414     a->inode.use = PETSC_FALSE;
415   }
416   if (flg) {
417     /* make sure we have an up-to-date copy on the CPU */
418     PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
419     A->ops->mult        = MatMult_SeqAIJ;
420     A->ops->multadd     = MatMultAdd_SeqAIJ;
421     A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
422     A->ops->duplicate   = MatDuplicate_SeqAIJ;
423     PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps)));
424   } else {
425     A->ops->mult        = MatMult_SeqAIJViennaCL;
426     A->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
427     A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
428     A->ops->destroy     = MatDestroy_SeqAIJViennaCL;
429     A->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
430 
431     a->ops->getarray          = MatSeqAIJGetArray_SeqAIJViennaCL;
432     a->ops->restorearray      = MatSeqAIJRestoreArray_SeqAIJViennaCL;
433     a->ops->getarrayread      = MatSeqAIJGetArrayRead_SeqAIJViennaCL;
434     a->ops->restorearrayread  = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL;
435     a->ops->getarraywrite     = MatSeqAIJGetArrayWrite_SeqAIJViennaCL;
436     a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL;
437   }
438   PetscFunctionReturn(PETSC_SUCCESS);
439 }
440 
MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat * newmat)441 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
442 {
443   Mat B;
444 
445   PetscFunctionBegin;
446   PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
447 
448   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat));
449 
450   B = *newmat;
451 
452   B->spptr = new Mat_SeqAIJViennaCL();
453 
454   ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec        = NULL;
455   ((Mat_SeqAIJViennaCL *)B->spptr)->mat            = NULL;
456   ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL;
457 
458   PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
459   A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
460 
461   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL));
462   PetscCall(PetscFree(B->defaultvectype));
463   PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype));
464 
465   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL));
466   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense));
467   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ));
468 
469   B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
470 
471   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
472   if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B));
473   PetscFunctionReturn(PETSC_SUCCESS);
474 }
475 
476 /*MC
477    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
478 
479    A matrix type whose data resides on GPUs. These matrices are in CSR format by
480    default. All matrix calculations are performed using the ViennaCL library.
481 
482    Options Database Keys:
483 +  -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions()
484 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
485 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
486 
487   Level: beginner
488 
489 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()`
490 M*/
491 
MatSolverTypeRegister_ViennaCL(void)492 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
493 {
494   PetscFunctionBegin;
495   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc));
496   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc));
497   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc));
498   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc));
499   PetscFunctionReturn(PETSC_SUCCESS);
500 }
501