1c4762a1bSJed Brown static char help[] = "Tests MATSEQDENSECUDA\n\n";
2c4762a1bSJed Brown
3c4762a1bSJed Brown #include <petscmat.h>
4c4762a1bSJed Brown
main(int argc,char ** argv)5d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv)
6d71ae5a4SJacob Faibussowitsch {
7c4762a1bSJed Brown Mat A, AC, B;
8c4762a1bSJed Brown PetscInt m = 10, n = 10;
9c4762a1bSJed Brown PetscReal r, tol = 10 * PETSC_SMALL;
10c4762a1bSJed Brown
11327415f7SBarry Smith PetscFunctionBeginUser;
12c8025a54SPierre Jolivet PetscCall(PetscInitialize(&argc, &argv, NULL, help));
139566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
149566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
159566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &A));
169566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m, n));
179566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATSEQDENSE));
189566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A));
199566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(A, NULL));
209566063dSJacob Faibussowitsch PetscCall(MatSetRandom(A, NULL));
21c20d7725SJed Brown #if 0
22c20d7725SJed Brown PetscInt i,j;
23c20d7725SJed Brown PetscScalar val;
24c20d7725SJed Brown for (i=0; i<m; i++) {
25c20d7725SJed Brown for (j=0; j<n; j++) {
26c20d7725SJed Brown val = (PetscScalar)(i+j);
279566063dSJacob Faibussowitsch PetscCall(MatSetValues(A,1,&i,1,&j,&val,INSERT_VALUES));
28c20d7725SJed Brown }
29c20d7725SJed Brown }
309566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
319566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
32c20d7725SJed Brown #endif
33c4762a1bSJed Brown
34c4762a1bSJed Brown /* Create a CUDA version of A */
35c20d7725SJed Brown #if defined(PETSC_HAVE_CUDA)
369566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQDENSECUDA, MAT_INITIAL_MATRIX, &AC));
37c20d7725SJed Brown #else
389566063dSJacob Faibussowitsch PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &AC));
39c20d7725SJed Brown #endif
409566063dSJacob Faibussowitsch PetscCall(MatDuplicate(AC, MAT_COPY_VALUES, &B));
41c4762a1bSJed Brown
42c4762a1bSJed Brown /* full CUDA AXPY */
439566063dSJacob Faibussowitsch PetscCall(MatAXPY(B, -1.0, AC, SAME_NONZERO_PATTERN));
449566063dSJacob Faibussowitsch PetscCall(MatNorm(B, NORM_INFINITY, &r));
4508401ef6SPierre Jolivet PetscCheck(r == 0.0, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatDuplicate + MatCopy + MatAXPY %g", (double)r);
46c4762a1bSJed Brown
47c4762a1bSJed Brown /* test Copy */
489566063dSJacob Faibussowitsch PetscCall(MatCopy(AC, B, SAME_NONZERO_PATTERN));
49c4762a1bSJed Brown
50c4762a1bSJed Brown /* call MatAXPY_Basic since B is CUDA, A is CPU, */
519566063dSJacob Faibussowitsch PetscCall(MatAXPY(B, -1.0, A, SAME_NONZERO_PATTERN));
529566063dSJacob Faibussowitsch PetscCall(MatNorm(B, NORM_INFINITY, &r));
5308401ef6SPierre Jolivet PetscCheck(r == 0.0, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatDuplicate + MatCopy + MatAXPY_Basic %g", (double)r);
54c4762a1bSJed Brown
55c4762a1bSJed Brown if (m == n) {
56c4762a1bSJed Brown Mat B1, B2;
57c4762a1bSJed Brown
589566063dSJacob Faibussowitsch PetscCall(MatCopy(AC, B, SAME_NONZERO_PATTERN));
59c4762a1bSJed Brown /* full CUDA PtAP */
60fb842aefSJose E. Roman PetscCall(MatPtAP(B, AC, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B1));
61c20d7725SJed Brown
62c4762a1bSJed Brown /* CPU PtAP since A is on the CPU only */
63fb842aefSJose E. Roman PetscCall(MatPtAP(B, A, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B2));
64c20d7725SJed Brown
659566063dSJacob Faibussowitsch PetscCall(MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN));
669566063dSJacob Faibussowitsch PetscCall(MatNorm(B2, NORM_INFINITY, &r));
6708401ef6SPierre Jolivet PetscCheck(r <= tol, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatPtAP %g", (double)r);
68c4762a1bSJed Brown
69c4762a1bSJed Brown /* test reuse */
70fb842aefSJose E. Roman PetscCall(MatPtAP(B, AC, MAT_REUSE_MATRIX, PETSC_DETERMINE, &B1));
71fb842aefSJose E. Roman PetscCall(MatPtAP(B, A, MAT_REUSE_MATRIX, PETSC_DETERMINE, &B2));
729566063dSJacob Faibussowitsch PetscCall(MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN));
739566063dSJacob Faibussowitsch PetscCall(MatNorm(B2, NORM_INFINITY, &r));
7408401ef6SPierre Jolivet PetscCheck(r <= tol, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatPtAP %g", (double)r);
75c4762a1bSJed Brown
769566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B1));
779566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B2));
78c4762a1bSJed Brown }
79c4762a1bSJed Brown
809566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B));
819566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AC));
829566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A));
839566063dSJacob Faibussowitsch PetscCall(PetscFinalize());
84b122ec5aSJacob Faibussowitsch return 0;
85c4762a1bSJed Brown }
86c4762a1bSJed Brown
87c4762a1bSJed Brown /*TEST
88c4762a1bSJed Brown
89c4762a1bSJed Brown build:
90c4762a1bSJed Brown requires: cuda
91c4762a1bSJed Brown
92c4762a1bSJed Brown test:
93*3886731fSPierre Jolivet output_file: output/empty.out
94c4762a1bSJed Brown args: -m {{3 5 12}} -n {{3 5 12}}
95c4762a1bSJed Brown suffix: seqdensecuda
96c4762a1bSJed Brown
97c4762a1bSJed Brown TEST*/
98