1 2 static char help[] = "Tests MATSEQDENSECUDA\n\n"; 3 4 #include <petscmat.h> 5 6 int main(int argc, char **argv) 7 { 8 Mat A, AC, B; 9 PetscInt m = 10, n = 10; 10 PetscReal r, tol = 10 * PETSC_SMALL; 11 12 PetscFunctionBeginUser; 13 PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 14 PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL)); 15 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL)); 16 PetscCall(MatCreate(PETSC_COMM_SELF, &A)); 17 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m, n)); 18 PetscCall(MatSetType(A, MATSEQDENSE)); 19 PetscCall(MatSetFromOptions(A)); 20 PetscCall(MatSeqDenseSetPreallocation(A, NULL)); 21 PetscCall(MatSetRandom(A, NULL)); 22 #if 0 23 PetscInt i,j; 24 PetscScalar val; 25 for (i=0; i<m; i++) { 26 for (j=0; j<n; j++) { 27 val = (PetscScalar)(i+j); 28 PetscCall(MatSetValues(A,1,&i,1,&j,&val,INSERT_VALUES)); 29 } 30 } 31 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 32 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 33 #endif 34 35 /* Create a CUDA version of A */ 36 #if defined(PETSC_HAVE_CUDA) 37 PetscCall(MatConvert(A, MATSEQDENSECUDA, MAT_INITIAL_MATRIX, &AC)); 38 #else 39 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &AC)); 40 #endif 41 PetscCall(MatDuplicate(AC, MAT_COPY_VALUES, &B)); 42 43 /* full CUDA AXPY */ 44 PetscCall(MatAXPY(B, -1.0, AC, SAME_NONZERO_PATTERN)); 45 PetscCall(MatNorm(B, NORM_INFINITY, &r)); 46 PetscCheck(r == 0.0, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatDuplicate + MatCopy + MatAXPY %g", (double)r); 47 48 /* test Copy */ 49 PetscCall(MatCopy(AC, B, SAME_NONZERO_PATTERN)); 50 51 /* call MatAXPY_Basic since B is CUDA, A is CPU, */ 52 PetscCall(MatAXPY(B, -1.0, A, SAME_NONZERO_PATTERN)); 53 PetscCall(MatNorm(B, NORM_INFINITY, &r)); 54 PetscCheck(r == 0.0, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatDuplicate + MatCopy + MatAXPY_Basic %g", (double)r); 55 56 if (m == n) { 57 Mat B1, B2; 58 59 PetscCall(MatCopy(AC, B, SAME_NONZERO_PATTERN)); 60 /* full CUDA PtAP */ 61 PetscCall(MatPtAP(B, AC, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B1)); 62 63 /* CPU PtAP since A is on the CPU only */ 64 PetscCall(MatPtAP(B, A, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B2)); 65 66 PetscCall(MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN)); 67 PetscCall(MatNorm(B2, NORM_INFINITY, &r)); 68 PetscCheck(r <= tol, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatPtAP %g", (double)r); 69 70 /* test reuse */ 71 PetscCall(MatPtAP(B, AC, MAT_REUSE_MATRIX, PETSC_DEFAULT, &B1)); 72 PetscCall(MatPtAP(B, A, MAT_REUSE_MATRIX, PETSC_DEFAULT, &B2)); 73 PetscCall(MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN)); 74 PetscCall(MatNorm(B2, NORM_INFINITY, &r)); 75 PetscCheck(r <= tol, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatPtAP %g", (double)r); 76 77 PetscCall(MatDestroy(&B1)); 78 PetscCall(MatDestroy(&B2)); 79 } 80 81 PetscCall(MatDestroy(&B)); 82 PetscCall(MatDestroy(&AC)); 83 PetscCall(MatDestroy(&A)); 84 PetscCall(PetscFinalize()); 85 return 0; 86 } 87 88 /*TEST 89 90 build: 91 requires: cuda 92 93 test: 94 output_file: output/ex32_1.out 95 args: -m {{3 5 12}} -n {{3 5 12}} 96 suffix: seqdensecuda 97 98 TEST*/ 99