xref: /petsc/src/mat/impls/aij/seq/seqcusparse/aijcusparse.cu (revision 9ae82921df069a58776bfe4da82b38e8ff7dd41c)
1 /*
2     Defines the basic matrix operations for the AIJ (compressed row)
3   matrix storage format.
4 */
5 
6 #include "petscconf.h"
7 PETSC_CUDA_EXTERN_C_BEGIN
8 #include "../src/mat/impls/aij/seq/aij.h"          /*I "petscmat.h" I*/
9 //#include "petscbt.h"
10 #include "../src/vec/vec/impls/dvecimpl.h"
11 #include "petsc-private/vecimpl.h"
12 PETSC_CUDA_EXTERN_C_END
13 #undef VecType
14 #include "cusparsematimpl.h"
15 
16 
17 EXTERN_C_BEGIN
18 PetscErrorCode MatILUFactorSymbolic_SeqAIJCUSPARSE(Mat,Mat,IS,IS,const MatFactorInfo*);
19 PetscErrorCode MatLUFactorSymbolic_SeqAIJCUSPARSE(Mat,Mat,IS,IS,const MatFactorInfo*);
20 PetscErrorCode MatLUFactorNumeric_SeqAIJCUSPARSE(Mat,Mat,const MatFactorInfo *);
21 PetscErrorCode MatSolve_SeqAIJCUSPARSE(Mat,Vec,Vec);
22 PetscErrorCode MatSolve_SeqAIJCUSPARSE_NaturalOrdering(Mat,Vec,Vec);
23 PetscErrorCode MatSetFromOptions_SeqAIJCUSPARSE(Mat);
24 PetscErrorCode MatCUSPARSEAnalysisAndCopyToGPU(Mat);
25 EXTERN_C_END
26 
27 EXTERN_C_BEGIN
28 #undef __FUNCT__
29 #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_cusparse"
30 PetscErrorCode MatFactorGetSolverPackage_seqaij_cusparse(Mat A,const MatSolverPackage *type)
31 {
32   PetscFunctionBegin;
33   *type = MATSOLVERCUSPARSE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 extern PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
40 EXTERN_C_END
41 
42 
43 
44 /*MC
45   MATSOLVERCUSPARSE = "cusparse" - A matrix type providing triangular solvers (ILU) for seq matrices
46   on the GPU of type, seqaijcusparse, aijcusparse, or seqaijcusp, aijcusp
47 
48   ./configure --download-txpetscgpu to install PETSc to use CUSPARSE solves
49 
50   Options Database Keys:
51 + -mat_mult_cusparse_storage_format <CSR>         - (choose one of) CSR, ELL
52 + -mat_solve_cusparse_storage_format <CSR>        - (choose one of) CSR, ELL
53 
54    Level: beginner
55 M*/
56 
57 EXTERN_C_BEGIN
58 #undef __FUNCT__
59 #define __FUNCT__ "MatGetFactor_seqaij_cusparse"
60 PetscErrorCode MatGetFactor_seqaij_cusparse(Mat A,MatFactorType ftype,Mat *B)
61 {
62   PetscErrorCode     ierr;
63 
64   PetscFunctionBegin;
65   ierr = MatGetFactor_seqaij_petsc(A,ftype,B);CHKERRQ(ierr);
66   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
67     ierr = MatSetType(*B,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
68     ierr = MatSetFromOptions_SeqAIJCUSPARSE(*B);CHKERRQ(ierr);
69     ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*B),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_seqaij_cusparse",MatFactorGetSolverPackage_seqaij_cusparse);CHKERRQ(ierr);
70     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJCUSPARSE;
71     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJCUSPARSE;
72   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported for CUSPARSE Matrix Types");
73   (*B)->factortype = ftype;
74   PetscFunctionReturn(0);
75 }
76 EXTERN_C_END
77 
78 #undef __FUNCT__
79 #define __FUNCT__ "MatAIJCUSPARSESetGPUStorageFormatForMatMult"
80 PetscErrorCode MatAIJCUSPARSESetGPUStorageFormatForMatMult(Mat A,const GPUStorageFormat format)
81 {
82   Mat_SeqAIJCUSPARSE *cusparseMat  = (Mat_SeqAIJCUSPARSE*)A->spptr;
83   PetscFunctionBegin;
84   cusparseMat->format = format;
85   PetscFunctionReturn(0);
86 }
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatAIJCUSPARSESetGPUStorageFormatForMatSolve"
90 PetscErrorCode MatAIJCUSPARSESetGPUStorageFormatForMatSolve(Mat A,const GPUStorageFormat format)
91 {
92   PetscErrorCode     ierr;
93   Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors  = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr;
94   GPU_Matrix_Ifc* cusparseMatLo  = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr;
95   GPU_Matrix_Ifc* cusparseMatUp  = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr;
96   PetscFunctionBegin;
97   cusparseTriFactors->format = format;
98   if (cusparseMatLo && cusparseMatUp) {
99     // If data exists already delete
100     if (cusparseMatLo)
101       delete (GPU_Matrix_Ifc *)cusparseMatLo;
102     if (cusparseMatUp)
103       delete (GPU_Matrix_Ifc *)cusparseMatUp;
104 
105     // key data was deleted so reset the flag.
106     A->valid_GPU_matrix = PETSC_CUSP_UNALLOCATED;
107 
108     // rebuild matrix
109     ierr=MatCUSPARSEAnalysisAndCopyToGPU(A);CHKERRQ(ierr);
110   }
111   PetscFunctionReturn(0);
112 }
113 
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "MatSetFromOptions_SeqAIJCUSPARSE"
117 PetscErrorCode MatSetFromOptions_SeqAIJCUSPARSE(Mat A)
118 {
119   PetscErrorCode     ierr;
120   PetscInt       idx=0;
121   const char * formats[]={CSR,ELL};
122   PetscBool      flg;
123   PetscFunctionBegin;
124   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"When using TxPETSCGPU, AIJCUSPARSE Options","Mat");CHKERRQ(ierr);
125   if (A->factortype==MAT_FACTOR_NONE) {
126     ierr = PetscOptionsEList("-mat_mult_cusparse_storage_format",
127 			     "Set the storage format of (seq)aijcusparse gpu matrices for SpMV",
128 			     "None",formats,2,formats[0],&idx,&flg);CHKERRQ(ierr);
129     ierr=MatAIJCUSPARSESetGPUStorageFormatForMatMult(A,formats[idx]);CHKERRQ(ierr);
130   }
131   else {
132     ierr = PetscOptionsEList("-mat_solve_cusparse_storage_format",
133 			     "Set the storage format of (seq)aijcusparse gpu matrices for TriSolve",
134 			     "None",formats,2,formats[0],&idx,&flg);CHKERRQ(ierr);
135     ierr=MatAIJCUSPARSESetGPUStorageFormatForMatSolve(A,formats[idx]);CHKERRQ(ierr);
136   }
137   ierr = PetscOptionsEnd();CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJCUSPARSE"
144 PetscErrorCode MatILUFactorSymbolic_SeqAIJCUSPARSE(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
145 {
146   PetscErrorCode     ierr;
147 
148   PetscFunctionBegin;
149   ierr = MatILUFactorSymbolic_SeqAIJ(B,A,isrow,iscol,info);CHKERRQ(ierr);
150   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJCUSPARSE;
151   PetscFunctionReturn(0);
152 }
153 
154 #undef __FUNCT__
155 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJCUSPARSE"
156 PetscErrorCode MatLUFactorSymbolic_SeqAIJCUSPARSE(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
157 {
158   PetscErrorCode     ierr;
159 
160   PetscFunctionBegin;
161   ierr = MatLUFactorSymbolic_SeqAIJ(B,A,isrow,iscol,info);CHKERRQ(ierr);
162   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJCUSPARSE;
163   PetscFunctionReturn(0);
164 }
165 
166 #undef __FUNCT__
167 #define __FUNCT__ "MatCUSPARSEBuildLowerTriMatrix"
168 PetscErrorCode MatCUSPARSEBuildLowerTriMatrix(Mat A)
169 {
170   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
171   PetscInt          n = A->rmap->n;
172   Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors  = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr;
173   GPU_Matrix_Ifc* cusparseMat  = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr;
174   cusparseStatus_t stat;
175   const PetscInt    *ai = a->i,*aj = a->j,*vi;
176   const MatScalar   *aa = a->a,*v;
177   PetscErrorCode     ierr;
178   PetscInt *AiLo, *AjLo;
179   PetscScalar *AALo;
180   PetscInt i,nz, nzLower, offset, rowOffset;
181 
182   PetscFunctionBegin;
183   if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED || A->valid_GPU_matrix == PETSC_CUSP_CPU){
184     try {
185       /* first figure out the number of nonzeros in the lower triangular matrix including 1's on the diagonal. */
186       nzLower=n+ai[n]-ai[1];
187 
188       /* Allocate Space for the lower triangular matrix */
189       ierr = cudaMallocHost((void **) &AiLo, (n+1)*sizeof(PetscInt));CHKERRCUSP(ierr);
190       ierr = cudaMallocHost((void **) &AjLo, nzLower*sizeof(PetscInt));CHKERRCUSP(ierr);
191       ierr = cudaMallocHost((void **) &AALo, nzLower*sizeof(PetscScalar));CHKERRCUSP(ierr);
192 
193       /* Fill the lower triangular matrix */
194       AiLo[0]=(PetscInt) 0;
195       AiLo[n]=nzLower;
196       AjLo[0]=(PetscInt) 0;
197       AALo[0]=(MatScalar) 1.0;
198       v    = aa;
199       vi   = aj;
200       offset=1;
201       rowOffset=1;
202       for (i=1; i<n; i++) {
203 	nz  = ai[i+1] - ai[i];
204 	// additional 1 for the term on the diagonal
205 	AiLo[i]=rowOffset;
206 	rowOffset+=nz+1;
207 
208 	ierr = PetscMemcpy(&(AjLo[offset]), vi, nz*sizeof(PetscInt));CHKERRQ(ierr);
209 	ierr = PetscMemcpy(&(AALo[offset]), v, nz*sizeof(PetscScalar));CHKERRQ(ierr);
210 
211 	offset+=nz;
212 	AjLo[offset]=(PetscInt) i;
213 	AALo[offset]=(MatScalar) 1.0;
214 	offset+=1;
215 
216 	v  += nz;
217 	vi += nz;
218       }
219       cusparseMat = GPU_Matrix_Factory::getNew(cusparseTriFactors->format);
220       stat = cusparseMat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_LOWER);CHKERRCUSP(stat);
221       ierr = cusparseMat->setMatrix(n, n, nzLower, AiLo, AjLo, AALo);CHKERRCUSP(ierr);
222       stat = cusparseMat->solveAnalysis();CHKERRCUSP(stat);
223       ((Mat_SeqAIJCUSPARSETriFactors*)A->spptr)->loTriFactorPtr = cusparseMat;
224       // free temporary data
225       ierr = cudaFreeHost(AiLo);CHKERRCUSP(ierr);
226       ierr = cudaFreeHost(AjLo);CHKERRCUSP(ierr);
227       ierr = cudaFreeHost(AALo);CHKERRCUSP(ierr);
228     } catch(char* ex) {
229       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex);
230     }
231   }
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatCUSPARSEBuildUpperTriMatrix"
237 PetscErrorCode MatCUSPARSEBuildUpperTriMatrix(Mat A)
238 {
239   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
240   PetscInt          n = A->rmap->n;
241   Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors  = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr;
242   GPU_Matrix_Ifc* cusparseMat  = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr;
243   cusparseStatus_t stat;
244   const PetscInt    *aj = a->j,*adiag = a->diag,*vi;
245   const MatScalar   *aa = a->a,*v;
246   PetscInt *AiUp, *AjUp;
247   PetscScalar *AAUp;
248   PetscInt i,nz, nzUpper, offset;
249   PetscErrorCode     ierr;
250 
251   PetscFunctionBegin;
252 
253   if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED || A->valid_GPU_matrix == PETSC_CUSP_CPU){
254     try {
255       /* next, figure out the number of nonzeros in the upper triangular matrix. */
256       nzUpper = adiag[0]-adiag[n];
257 
258       /* Allocate Space for the upper triangular matrix */
259       ierr = cudaMallocHost((void **) &AiUp, (n+1)*sizeof(PetscInt));CHKERRCUSP(ierr);
260       ierr = cudaMallocHost((void **) &AjUp, nzUpper*sizeof(PetscInt));CHKERRCUSP(ierr);
261       ierr = cudaMallocHost((void **) &AAUp, nzUpper*sizeof(PetscScalar));CHKERRCUSP(ierr);
262 
263       /* Fill the upper triangular matrix */
264       AiUp[0]=(PetscInt) 0;
265       AiUp[n]=nzUpper;
266       offset = nzUpper;
267       for (i=n-1; i>=0; i--){
268 	v   = aa + adiag[i+1] + 1;
269 	vi  = aj + adiag[i+1] + 1;
270 
271 	// number of elements NOT on the diagonal
272 	nz = adiag[i] - adiag[i+1]-1;
273 
274 	// decrement the offset
275 	offset -= (nz+1);
276 
277 	// first, set the diagonal elements
278 	AjUp[offset] = (PetscInt) i;
279 	AAUp[offset] = 1./v[nz];
280 	AiUp[i] = AiUp[i+1] - (nz+1);
281 
282 	ierr = PetscMemcpy(&(AjUp[offset+1]), vi, nz*sizeof(PetscInt));CHKERRQ(ierr);
283 	ierr = PetscMemcpy(&(AAUp[offset+1]), v, nz*sizeof(PetscScalar));CHKERRQ(ierr);
284       }
285       cusparseMat = GPU_Matrix_Factory::getNew(cusparseTriFactors->format);
286       stat = cusparseMat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_TRIANGULAR, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat);
287       ierr = cusparseMat->setMatrix(n, n, nzUpper, AiUp, AjUp, AAUp);CHKERRCUSP(ierr);
288       stat = cusparseMat->solveAnalysis();CHKERRCUSP(stat);
289       ((Mat_SeqAIJCUSPARSETriFactors*)A->spptr)->upTriFactorPtr = cusparseMat;
290       // free temporary data
291       ierr = cudaFreeHost(AiUp);CHKERRCUSP(ierr);
292       ierr = cudaFreeHost(AjUp);CHKERRCUSP(ierr);
293       ierr = cudaFreeHost(AAUp);CHKERRCUSP(ierr);
294     } catch(char* ex) {
295       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex);
296     }
297   }
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatCUSPARSEAnalysiAndCopyToGPU"
303 PetscErrorCode MatCUSPARSEAnalysisAndCopyToGPU(Mat A)
304 {
305   PetscErrorCode     ierr;
306   Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data;
307   Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors  = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr;
308   IS               isrow = a->row,iscol = a->icol;
309   PetscBool        row_identity,col_identity;
310   const PetscInt   *r,*c;
311   PetscInt          n = A->rmap->n;
312 
313   PetscFunctionBegin;
314   ierr = MatCUSPARSEBuildLowerTriMatrix(A);CHKERRQ(ierr);
315   ierr = MatCUSPARSEBuildUpperTriMatrix(A);CHKERRQ(ierr);
316   cusparseTriFactors->tempvec = new CUSPARRAY;
317   cusparseTriFactors->tempvec->resize(n);
318 
319   A->valid_GPU_matrix = PETSC_CUSP_BOTH;
320   //lower triangular indices
321   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
322   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
323   if (!row_identity)
324     ierr = cusparseTriFactors->loTriFactorPtr->setOrdIndices(r, n);CHKERRCUSP(ierr);
325   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
326 
327   //upper triangular indices
328   ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr);
329   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
330   if (!col_identity)
331     ierr = cusparseTriFactors->upTriFactorPtr->setOrdIndices(c, n);CHKERRCUSP(ierr);
332   ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr);
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJCUSPARSE"
338 PetscErrorCode MatLUFactorNumeric_SeqAIJCUSPARSE(Mat B,Mat A,const MatFactorInfo *info)
339 {
340   PetscErrorCode   ierr;
341   Mat_SeqAIJ       *b=(Mat_SeqAIJ *)B->data;
342   IS               isrow = b->row,iscol = b->col;
343   PetscBool        row_identity,col_identity;
344 
345   PetscFunctionBegin;
346   std::cout << "1 MatLUFactorNumeric_SeqAIJCUSPARSE"<<std::endl;
347   ierr = MatLUFactorNumeric_SeqAIJ(B,A,info);CHKERRQ(ierr);
348   std::cout << "2 MatLUFactorNumeric_SeqAIJCUSPARSE"<<std::endl;
349   // determine which version of MatSolve needs to be used.
350   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
351   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
352   if (row_identity && col_identity) B->ops->solve = MatSolve_SeqAIJCUSPARSE_NaturalOrdering;
353   else                              B->ops->solve = MatSolve_SeqAIJCUSPARSE;
354   if (!row_identity) printf("Row permutations exist!");
355   if (!col_identity) printf("Col permutations exist!");
356 
357   // get the triangular factors
358   ierr = MatCUSPARSEAnalysisAndCopyToGPU(B);CHKERRQ(ierr);
359   std::cout << "3 MatLUFactorNumeric_SeqAIJCUSPARSE"<<std::endl;
360   PetscFunctionReturn(0);
361 }
362 
363 
364 
365 #undef __FUNCT__
366 #define __FUNCT__ "MatSolve_SeqAIJCUSPARSE"
367 PetscErrorCode MatSolve_SeqAIJCUSPARSE(Mat A,Vec bb,Vec xx)
368 {
369   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
370   PetscErrorCode ierr;
371   CUSPARRAY      *xGPU, *bGPU;
372   cusparseStatus_t stat;
373   Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors  = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr;
374   GPU_Matrix_Ifc *cusparseMatLo  = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr;
375   GPU_Matrix_Ifc *cusparseMatUp  = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr;
376   CUSPARRAY * tempGPU = (CUSPARRAY*) cusparseTriFactors->tempvec;
377 
378   PetscFunctionBegin;
379   // Get the GPU pointers
380   ierr = VecCUSPGetArrayWrite(xx,&xGPU);CHKERRQ(ierr);
381   ierr = VecCUSPGetArrayRead(bb,&bGPU);CHKERRQ(ierr);
382 
383   // solve with reordering
384   ierr = cusparseMatLo->reorderIn(xGPU, bGPU);CHKERRCUSP(ierr);
385   stat = cusparseMatLo->solve(xGPU, tempGPU);CHKERRCUSP(stat);
386   stat = cusparseMatUp->solve(tempGPU, xGPU);CHKERRCUSP(stat);
387   ierr = cusparseMatUp->reorderOut(xGPU);CHKERRCUSP(ierr);
388 
389   ierr = VecCUSPRestoreArrayRead(bb,&bGPU);CHKERRQ(ierr);
390   ierr = VecCUSPRestoreArrayWrite(xx,&xGPU);CHKERRQ(ierr);
391   ierr = WaitForGPU();CHKERRCUSP(ierr);
392   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
393   PetscFunctionReturn(0);
394 }
395 
396 
397 
398 
399 #undef __FUNCT__
400 #define __FUNCT__ "MatSolve_SeqAIJCUSPARSE_NaturalOrdering"
401 PetscErrorCode MatSolve_SeqAIJCUSPARSE_NaturalOrdering(Mat A,Vec bb,Vec xx)
402 {
403   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
404   PetscErrorCode    ierr;
405   CUSPARRAY         *xGPU, *bGPU;
406   cusparseStatus_t stat;
407   Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors  = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr;
408   GPU_Matrix_Ifc *cusparseMatLo  = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr;
409   GPU_Matrix_Ifc *cusparseMatUp  = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr;
410   CUSPARRAY * tempGPU = (CUSPARRAY*) cusparseTriFactors->tempvec;
411 
412   PetscFunctionBegin;
413   // Get the GPU pointers
414   ierr = VecCUSPGetArrayWrite(xx,&xGPU);CHKERRQ(ierr);
415   ierr = VecCUSPGetArrayRead(bb,&bGPU);CHKERRQ(ierr);
416 
417   // solve
418   stat = cusparseMatLo->solve(bGPU, tempGPU);CHKERRCUSP(stat);
419   stat = cusparseMatUp->solve(tempGPU, xGPU);CHKERRCUSP(stat);
420 
421   ierr = VecCUSPRestoreArrayRead(bb,&bGPU);CHKERRQ(ierr);
422   ierr = VecCUSPRestoreArrayWrite(xx,&xGPU);CHKERRQ(ierr);
423   ierr = WaitForGPU();CHKERRCUSP(ierr);
424   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatCUSPARSECopyToGPU"
430 PetscErrorCode MatCUSPARSECopyToGPU(Mat A)
431 {
432 
433   Mat_SeqAIJCUSPARSE *cusparseMat  = (Mat_SeqAIJCUSPARSE*)A->spptr;
434   Mat_SeqAIJ      *a          = (Mat_SeqAIJ*)A->data;
435   PetscInt        m           = A->rmap->n,*ii,*ridx;
436   PetscErrorCode  ierr;
437 
438 
439   PetscFunctionBegin;
440   if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED || A->valid_GPU_matrix == PETSC_CUSP_CPU){
441     ierr = PetscLogEventBegin(MAT_CUSPARSECopyToGPU,A,0,0,0);CHKERRQ(ierr);
442     /*
443       It may be possible to reuse nonzero structure with new matrix values but
444       for simplicity and insured correctness we delete and build a new matrix on
445       the GPU. Likely a very small performance hit.
446     */
447     if (cusparseMat->mat){
448       try {
449 	delete cusparseMat->mat;
450 	if (cusparseMat->tempvec)
451 	  delete cusparseMat->tempvec;
452 
453       } catch(char* ex) {
454 	SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex);
455       }
456     }
457     try {
458       cusparseMat->nonzerorow=0;
459       for (int j = 0; j<m; j++)
460 	cusparseMat->nonzerorow += ((a->i[j+1]-a->i[j])>0);
461 
462       if (a->compressedrow.use) {
463 	m    = a->compressedrow.nrows;
464 	ii   = a->compressedrow.i;
465 	ridx = a->compressedrow.rindex;
466       } else {
467 	// Forcing compressed row on the GPU ... only relevant for CSR storage
468 	int k=0;
469 	ierr = PetscMalloc((cusparseMat->nonzerorow+1)*sizeof(PetscInt), &ii);CHKERRQ(ierr);
470 	ierr = PetscMalloc((cusparseMat->nonzerorow)*sizeof(PetscInt), &ridx);CHKERRQ(ierr);
471 	ii[0]=0;
472 	for (int j = 0; j<m; j++) {
473 	  if ((a->i[j+1]-a->i[j])>0) {
474 	    ii[k] = a->i[j];
475 	    ridx[k]= j;
476 	    k++;
477 	  }
478 	}
479 	ii[cusparseMat->nonzerorow] = a->nz;
480 	m = cusparseMat->nonzerorow;
481       }
482 
483       // Build our matrix ... first determine the GPU storage type
484       cusparseMat->mat = GPU_Matrix_Factory::getNew(cusparseMat->format);
485 
486       // FILL MODE UPPER is irrelevant
487       cusparseStatus_t stat = cusparseMat->mat->initializeCusparse(MAT_cusparseHandle, CUSPARSE_MATRIX_TYPE_GENERAL, CUSPARSE_FILL_MODE_UPPER);CHKERRCUSP(stat);
488 
489       // Create the streams and events (if desired).
490       PetscMPIInt    size;
491       ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
492       ierr = cusparseMat->mat->buildStreamsAndEvents(size, &theBodyStream);CHKERRCUSP(ierr);
493 
494       // lastly, build the matrix
495       ierr = cusparseMat->mat->setMatrix(m, A->cmap->n, a->nz, ii, a->j, a->a);CHKERRCUSP(ierr);
496       cusparseMat->mat->setCPRowIndices(ridx, m);
497 	//      }
498       if (!a->compressedrow.use) {
499 	// free data
500 	ierr = PetscFree(ii);CHKERRQ(ierr);
501 	ierr = PetscFree(ridx);CHKERRQ(ierr);
502       }
503 
504       cusparseMat->tempvec = new CUSPARRAY;
505       cusparseMat->tempvec->resize(m);
506     } catch(char* ex) {
507       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex);
508     }
509     ierr = WaitForGPU();CHKERRCUSP(ierr);
510     A->valid_GPU_matrix = PETSC_CUSP_BOTH;
511     ierr = PetscLogEventEnd(MAT_CUSPARSECopyToGPU,A,0,0,0);CHKERRQ(ierr);
512   }
513   PetscFunctionReturn(0);
514 }
515 
516 // #undef __FUNCT__
517 // #define __FUNCT__ "MatCUSPARSECopyFromGPU"
518 // PetscErrorCode MatCUSPARSECopyFromGPU(Mat A, CUSPMATRIX *Agpu)
519 // {
520 //   Mat_SeqAIJCUSPARSE *cuspstruct = (Mat_SeqAIJCUSPARSE *) A->spptr;
521 //   Mat_SeqAIJ     *a          = (Mat_SeqAIJ *) A->data;
522 //   PetscInt        m          = A->rmap->n;
523 //   PetscErrorCode  ierr;
524 
525 //   PetscFunctionBegin;
526 //   if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED) {
527 //     if (A->valid_GPU_matrix == PETSC_CUSP_UNALLOCATED) {
528 //       try {
529 //         cuspstruct->mat = Agpu;
530 //         if (a->compressedrow.use) {
531 //           //PetscInt *ii, *ridx;
532 //           SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "Cannot handle row compression for GPU matrices");
533 //         } else {
534 //           PetscInt i;
535 
536 //           if (m+1 != (PetscInt) cuspstruct->mat->row_offsets.size()) {SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", cuspstruct->mat->row_offsets.size()-1, m);}
537 //           a->nz    = cuspstruct->mat->values.size();
538 //           a->maxnz = a->nz; /* Since we allocate exactly the right amount */
539 //           A->preallocated = PETSC_TRUE;
540 //           // Copy ai, aj, aa
541 //           if (a->singlemalloc) {
542 //             if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
543 //           } else {
544 //             if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
545 //             if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
546 //             if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
547 //           }
548 //           ierr = PetscMalloc3(a->nz,PetscScalar,&a->a,a->nz,PetscInt,&a->j,m+1,PetscInt,&a->i);CHKERRQ(ierr);
549 //           ierr = PetscLogObjectMemory(A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
550 //           a->singlemalloc = PETSC_TRUE;
551 //           thrust::copy(cuspstruct->mat->row_offsets.begin(), cuspstruct->mat->row_offsets.end(), a->i);
552 //           thrust::copy(cuspstruct->mat->column_indices.begin(), cuspstruct->mat->column_indices.end(), a->j);
553 //           thrust::copy(cuspstruct->mat->values.begin(), cuspstruct->mat->values.end(), a->a);
554 //           // Setup row lengths
555 //           if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
556 //           ierr = PetscMalloc2(m,PetscInt,&a->imax,m,PetscInt,&a->ilen);CHKERRQ(ierr);
557 //           ierr = PetscLogObjectMemory(A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
558 //           for(i = 0; i < m; ++i) {
559 //             a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i];
560 //           }
561 //           // a->diag?
562 //         }
563 //         cuspstruct->tempvec = new CUSPARRAY;
564 //         cuspstruct->tempvec->resize(m);
565 //       } catch(char *ex) {
566 //         SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "CUSP error: %s", ex);
567 //       }
568 //     }
569 //     // This assembly prevents resetting the flag to PETSC_CUSP_CPU and recopying
570 //     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
571 //     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
572 //     A->valid_GPU_matrix = PETSC_CUSP_BOTH;
573 //   } else {
574 //     SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "Only valid for unallocated GPU matrices");
575 //   }
576 //   PetscFunctionReturn(0);
577 // }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatGetVecs_SeqAIJCUSPARSE"
581 PetscErrorCode MatGetVecs_SeqAIJCUSPARSE(Mat mat, Vec *right, Vec *left)
582 {
583   PetscErrorCode ierr;
584 
585   PetscFunctionBegin;
586 
587   if (right) {
588     ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr);
589     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
590     ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
591     ierr = VecSetType(*right,VECSEQCUSP);CHKERRQ(ierr);
592     ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
593   }
594   if (left) {
595     ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr);
596     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
597     ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
598     ierr = VecSetType(*left,VECSEQCUSP);CHKERRQ(ierr);
599     ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatMult_SeqAIJCUSPARSE"
606 PetscErrorCode MatMult_SeqAIJCUSPARSE(Mat A,Vec xx,Vec yy)
607 {
608   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
609   PetscErrorCode ierr;
610   Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE *)A->spptr;
611   CUSPARRAY      *xarray,*yarray;
612 
613   PetscFunctionBegin;
614   // The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE
615   // ierr = MatCUSPARSECopyToGPU(A);CHKERRQ(ierr);
616   ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr);
617   ierr = VecCUSPGetArrayWrite(yy,&yarray);CHKERRQ(ierr);
618   try {
619     ierr = cusparseMat->mat->multiply(xarray, yarray);CHKERRCUSP(ierr);
620   } catch (char* ex) {
621     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex);
622   }
623   ierr = VecCUSPRestoreArrayRead(xx,&xarray);CHKERRQ(ierr);
624   ierr = VecCUSPRestoreArrayWrite(yy,&yarray);CHKERRQ(ierr);
625   if (!cusparseMat->mat->hasNonZeroStream())
626     ierr = WaitForGPU();CHKERRCUSP(ierr);
627   ierr = PetscLogFlops(2.0*a->nz - cusparseMat->nonzerorow);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 
632 #undef __FUNCT__
633 #define __FUNCT__ "MatMultAdd_SeqAIJCUSPARSE"
634 PetscErrorCode MatMultAdd_SeqAIJCUSPARSE(Mat A,Vec xx,Vec yy,Vec zz)
635 {
636   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
637   PetscErrorCode ierr;
638   Mat_SeqAIJCUSPARSE *cusparseMat = (Mat_SeqAIJCUSPARSE *)A->spptr;
639   CUSPARRAY      *xarray,*yarray,*zarray;
640   PetscFunctionBegin;
641   // The line below should not be necessary as it has been moved to MatAssemblyEnd_SeqAIJCUSPARSE
642   // ierr = MatCUSPARSECopyToGPU(A);CHKERRQ(ierr);
643   try {
644     ierr = VecCopy_SeqCUSP(yy,zz);CHKERRQ(ierr);
645     ierr = VecCUSPGetArrayRead(xx,&xarray);CHKERRQ(ierr);
646     ierr = VecCUSPGetArrayRead(yy,&yarray);CHKERRQ(ierr);
647     ierr = VecCUSPGetArrayWrite(zz,&zarray);CHKERRQ(ierr);
648 
649     // multiply add
650     ierr = cusparseMat->mat->multiplyAdd(xarray, zarray);CHKERRCUSP(ierr);
651 
652     ierr = VecCUSPRestoreArrayRead(xx,&xarray);CHKERRQ(ierr);
653     ierr = VecCUSPRestoreArrayRead(yy,&yarray);CHKERRQ(ierr);
654     ierr = VecCUSPRestoreArrayWrite(zz,&zarray);CHKERRQ(ierr);
655 
656   } catch(char* ex) {
657     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex);
658   }
659   ierr = WaitForGPU();CHKERRCUSP(ierr);
660   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNCT__
665 #define __FUNCT__ "MatAssemblyEnd_SeqAIJCUSPARSE"
666 PetscErrorCode MatAssemblyEnd_SeqAIJCUSPARSE(Mat A,MatAssemblyType mode)
667 {
668   PetscErrorCode  ierr;
669   PetscFunctionBegin;
670   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
671   ierr = MatCUSPARSECopyToGPU(A);CHKERRQ(ierr);
672   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
673   // this is not necessary since MatCUSPARSECopyToGPU has been called.
674   //if (A->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED){
675   //  A->valid_GPU_matrix = PETSC_CUSP_CPU;
676   //}
677   A->ops->mult    = MatMult_SeqAIJCUSPARSE;
678   A->ops->multadd    = MatMultAdd_SeqAIJCUSPARSE;
679   PetscFunctionReturn(0);
680 }
681 
682 /* --------------------------------------------------------------------------------*/
683 #undef __FUNCT__
684 #define __FUNCT__ "MatCreateSeqAIJCUSPARSE"
685 /*@C
686    MatCreateSeqAIJCUSPARSE - Creates a sparse matrix in AIJ (compressed row) format
687    (the default parallel PETSc format).  For good matrix assembly performance
688    the user should preallocate the matrix storage by setting the parameter nz
689    (or the array nnz).  By setting these parameters accurately, performance
690    during matrix assembly can be increased by more than a factor of 50.
691 
692    Collective on MPI_Comm
693 
694    Input Parameters:
695 +  comm - MPI communicator, set to PETSC_COMM_SELF
696 .  m - number of rows
697 .  n - number of columns
698 .  nz - number of nonzeros per row (same for all rows)
699 -  nnz - array containing the number of nonzeros in the various rows
700          (possibly different for each row) or PETSC_NULL
701 
702    Output Parameter:
703 .  A - the matrix
704 
705    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
706    MatXXXXSetPreallocation() paradgm instead of this routine directly.
707    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
708 
709    Notes:
710    If nnz is given then nz is ignored
711 
712    The AIJ format (also called the Yale sparse matrix format or
713    compressed row storage), is fully compatible with standard Fortran 77
714    storage.  That is, the stored row and column indices can begin at
715    either one (as in Fortran) or zero.  See the users' manual for details.
716 
717    Specify the preallocated storage with either nz or nnz (not both).
718    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
719    allocation.  For large problems you MUST preallocate memory or you
720    will get TERRIBLE performance, see the users' manual chapter on matrices.
721 
722    By default, this format uses inodes (identical nodes) when possible, to
723    improve numerical efficiency of matrix-vector products and solves. We
724    search for consecutive rows with the same nonzero structure, thereby
725    reusing matrix information to achieve increased efficiency.
726 
727    Level: intermediate
728 
729 .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
730 
731 @*/
732 PetscErrorCode  MatCreateSeqAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
733 {
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatCreate(comm,A);CHKERRQ(ierr);
738   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
739   ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
740   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatDestroy_SeqAIJCUSPARSE"
746 PetscErrorCode MatDestroy_SeqAIJCUSPARSE(Mat A)
747 {
748   PetscErrorCode        ierr;
749   Mat_SeqAIJCUSPARSE      *cusparseMat = (Mat_SeqAIJCUSPARSE*)A->spptr;
750 
751   PetscFunctionBegin;
752   if (A->factortype==MAT_FACTOR_NONE) {
753     // The regular matrices
754     try {
755       if (A->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED){
756 	delete (GPU_Matrix_Ifc *)(cusparseMat->mat);
757       }
758       if (cusparseMat->tempvec!=0)
759 	delete cusparseMat->tempvec;
760       delete cusparseMat;
761       A->valid_GPU_matrix = PETSC_CUSP_UNALLOCATED;
762     } catch(char* ex) {
763       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex);
764     }
765   } else {
766     // The triangular factors
767     try {
768       Mat_SeqAIJCUSPARSETriFactors *cusparseTriFactors  = (Mat_SeqAIJCUSPARSETriFactors*)A->spptr;
769       GPU_Matrix_Ifc *cusparseMatLo  = (GPU_Matrix_Ifc*)cusparseTriFactors->loTriFactorPtr;
770       GPU_Matrix_Ifc *cusparseMatUp  = (GPU_Matrix_Ifc*)cusparseTriFactors->upTriFactorPtr;
771       // destroy the matrix and the container
772       delete (GPU_Matrix_Ifc *)cusparseMatLo;
773       delete (GPU_Matrix_Ifc *)cusparseMatUp;
774       delete (CUSPARRAY*) cusparseTriFactors->tempvec;
775       delete cusparseTriFactors;
776     } catch(char* ex) {
777       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSPARSE error: %s", ex);
778     }
779   }
780   if (MAT_cusparseHandle) {
781     cusparseStatus_t stat;
782     stat = cusparseDestroy(MAT_cusparseHandle);CHKERRCUSP(stat);
783     MAT_cusparseHandle=0;
784   }
785   /*this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
786   A->spptr = 0;
787 
788   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
789   PetscFunctionReturn(0);
790 }
791 
792 
793 EXTERN_C_BEGIN
794 #undef __FUNCT__
795 #define __FUNCT__ "MatCreate_SeqAIJCUSPARSE"
796 PetscErrorCode  MatCreate_SeqAIJCUSPARSE(Mat B)
797 {
798   PetscErrorCode ierr;
799 
800   PetscFunctionBegin;
801   ierr            = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
802   B->ops->mult    = MatMult_SeqAIJCUSPARSE;
803   B->ops->multadd = MatMultAdd_SeqAIJCUSPARSE;
804 
805   if (B->factortype==MAT_FACTOR_NONE) {
806     /* you cannot check the inode.use flag here since the matrix was just created.*/
807     B->spptr        = new Mat_SeqAIJCUSPARSE;
808     ((Mat_SeqAIJCUSPARSE *)B->spptr)->mat = 0;
809     ((Mat_SeqAIJCUSPARSE *)B->spptr)->tempvec = 0;
810     ((Mat_SeqAIJCUSPARSE *)B->spptr)->format = CSR;
811   } else {
812     /* NEXT, set the pointers to the triangular factors */
813     B->spptr = new Mat_SeqAIJCUSPARSETriFactors;
814     ((Mat_SeqAIJCUSPARSETriFactors *)B->spptr)->loTriFactorPtr = 0;
815     ((Mat_SeqAIJCUSPARSETriFactors *)B->spptr)->upTriFactorPtr = 0;
816     ((Mat_SeqAIJCUSPARSETriFactors *)B->spptr)->tempvec = 0;
817     ((Mat_SeqAIJCUSPARSETriFactors *)B->spptr)->format = CSR;
818   }
819   // Create a single instance of the MAT_cusparseHandle for any matrix (matMult, TriSolve, ...)
820   if (!MAT_cusparseHandle) {
821     cusparseStatus_t stat;
822     stat = cusparseCreate(&MAT_cusparseHandle);CHKERRCUSP(stat);
823   }
824   // Here we overload MatGetFactor_petsc_C which enables -mat_type aijcusparse to use the
825   // default cusparse tri solve. Note the difference with the implementation in
826   // MatCreate_SeqAIJCUSP in ../seqcusp/aijcusp.cu
827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C","MatGetFactor_seqaij_cusparse",MatGetFactor_seqaij_cusparse);CHKERRQ(ierr);
828   B->ops->assemblyend    = MatAssemblyEnd_SeqAIJCUSPARSE;
829   B->ops->destroy        = MatDestroy_SeqAIJCUSPARSE;
830   B->ops->getvecs        = MatGetVecs_SeqAIJCUSPARSE;
831   B->ops->setfromoptions = MatSetFromOptions_SeqAIJCUSPARSE;
832   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
833   B->valid_GPU_matrix = PETSC_CUSP_UNALLOCATED;
834   PetscFunctionReturn(0);
835 }
836 EXTERN_C_END
837 
838