static char help[] = "Tests MatMult(), MatMultAdd(), MatMultTranspose().\n\ Also MatMultTransposeAdd(), MatScale(), MatGetDiagonal(), MatDiagonalScale(), MatZeroEntries() and MatDuplicate().\n\n"; #include int main(int argc, char **args) { Mat C; Vec s, u, w, x, y, z; PetscInt i, j, m = 8, n, rstart, rend, vstart, vend; PetscScalar one = 1.0, negone = -1.0, v, alpha = 0.1; PetscReal norm, tol = PETSC_SQRT_MACHINE_EPSILON; PetscBool flg; PetscFunctionBeginUser; PetscCall(PetscInitialize(&argc, &args, NULL, help)); PetscCall(PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD, PETSC_VIEWER_ASCII_COMMON)); PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL)); n = m; PetscCall(PetscOptionsHasName(NULL, NULL, "-rectA", &flg)); if (flg) n += 2; PetscCall(PetscOptionsHasName(NULL, NULL, "-rectB", &flg)); if (flg) n -= 2; /* ---------- Assemble matrix and vectors ----------- */ PetscCall(MatCreate(PETSC_COMM_WORLD, &C)); PetscCall(MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m, n)); PetscCall(MatSetFromOptions(C)); PetscCall(MatSetUp(C)); PetscCall(MatGetOwnershipRange(C, &rstart, &rend)); PetscCall(VecCreate(PETSC_COMM_WORLD, &x)); PetscCall(VecSetSizes(x, PETSC_DECIDE, m)); PetscCall(VecSetFromOptions(x)); PetscCall(VecDuplicate(x, &z)); PetscCall(VecDuplicate(x, &w)); PetscCall(VecCreate(PETSC_COMM_WORLD, &y)); PetscCall(VecSetSizes(y, PETSC_DECIDE, n)); PetscCall(VecSetFromOptions(y)); PetscCall(VecDuplicate(y, &u)); PetscCall(VecDuplicate(y, &s)); PetscCall(VecGetOwnershipRange(y, &vstart, &vend)); /* Assembly */ for (i = rstart; i < rend; i++) { v = 100 * (i + 1); PetscCall(VecSetValues(z, 1, &i, &v, INSERT_VALUES)); for (j = 0; j < n; j++) { v = 10 * (i + 1) + j + 1; PetscCall(MatSetValues(C, 1, &i, 1, &j, &v, INSERT_VALUES)); } } /* Flush off proc Vec values and do more assembly */ PetscCall(VecAssemblyBegin(z)); for (i = vstart; i < vend; i++) { v = one * ((PetscReal)i); PetscCall(VecSetValues(y, 1, &i, &v, INSERT_VALUES)); v = 100.0 * i; PetscCall(VecSetValues(u, 1, &i, &v, INSERT_VALUES)); } /* Flush off proc Mat values and do more assembly */ PetscCall(MatAssemblyBegin(C, MAT_FLUSH_ASSEMBLY)); for (i = rstart; i < rend; i++) { for (j = 0; j < n; j++) { v = 10 * (i + 1) + j + 1; PetscCall(MatSetValues(C, 1, &i, 1, &j, &v, INSERT_VALUES)); } } /* Try overlap Coomunication with the next stage XXXSetValues */ PetscCall(VecAssemblyEnd(z)); PetscCall(MatAssemblyEnd(C, MAT_FLUSH_ASSEMBLY)); CHKMEMQ; /* The Assembly for the second Stage */ PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); PetscCall(VecAssemblyBegin(y)); PetscCall(VecAssemblyEnd(y)); PetscCall(MatScale(C, alpha)); PetscCall(VecAssemblyBegin(u)); PetscCall(VecAssemblyEnd(u)); PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMult()\n")); CHKMEMQ; PetscCall(MatMult(C, y, x)); CHKMEMQ; PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD)); PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultAdd()\n")); PetscCall(MatMultAdd(C, y, z, w)); PetscCall(VecAXPY(x, one, z)); PetscCall(VecAXPY(x, negone, w)); PetscCall(VecNorm(x, NORM_2, &norm)); if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error difference = %g\n", (double)norm)); /* ------- Test MatMultTranspose(), MatMultTransposeAdd() ------- */ for (i = rstart; i < rend; i++) { v = one * ((PetscReal)i); PetscCall(VecSetValues(x, 1, &i, &v, INSERT_VALUES)); } PetscCall(VecAssemblyBegin(x)); PetscCall(VecAssemblyEnd(x)); PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultTranspose()\n")); PetscCall(MatMultTranspose(C, x, y)); PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD)); PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatMultTransposeAdd()\n")); PetscCall(MatMultTransposeAdd(C, x, u, s)); PetscCall(VecAXPY(y, one, u)); PetscCall(VecAXPY(y, negone, s)); PetscCall(VecNorm(y, NORM_2, &norm)); if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error difference = %g\n", (double)norm)); /* -------------------- Test MatGetDiagonal() ------------------ */ PetscCall(PetscPrintf(PETSC_COMM_WORLD, "testing MatGetDiagonal(), MatDiagonalScale()\n")); PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD)); PetscCall(VecSet(x, one)); PetscCall(MatGetDiagonal(C, x)); PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD)); for (i = vstart; i < vend; i++) { v = one * ((PetscReal)(i + 1)); PetscCall(VecSetValues(y, 1, &i, &v, INSERT_VALUES)); } /* -------------------- Test () MatDiagonalScale ------------------ */ PetscCall(PetscOptionsHasName(NULL, NULL, "-test_diagonalscale", &flg)); if (flg) { PetscCall(MatDiagonalScale(C, x, y)); PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD)); } /* -------------------- Test () MatZeroEntries() and MatDuplicate() ------------------ */ PetscCall(PetscOptionsHasName(NULL, NULL, "-test_zeroentries", &flg)); if (flg) { Mat D; PetscCall(MatDuplicate(C, MAT_COPY_VALUES, &D)); PetscCall(MatZeroEntries(D)); PetscCall(MatView(D, PETSC_VIEWER_STDOUT_WORLD)); PetscCall(MatDestroy(&D)); } /* Free data structures */ PetscCall(VecDestroy(&u)); PetscCall(VecDestroy(&s)); PetscCall(VecDestroy(&w)); PetscCall(VecDestroy(&x)); PetscCall(VecDestroy(&y)); PetscCall(VecDestroy(&z)); PetscCall(MatDestroy(&C)); PetscCall(PetscFinalize()); return 0; } /*TEST test: suffix: 11_A args: -mat_type seqaij -rectA filter: grep -v type test: args: -mat_type seqdense -rectA suffix: 12_A test: args: -mat_type seqaij -rectB suffix: 11_B filter: grep -v type test: args: -mat_type seqdense -rectB suffix: 12_B test: suffix: 21 args: -mat_type mpiaij filter: grep -v type test: suffix: 22 args: -mat_type mpidense test: suffix: 23 nsize: 3 args: -mat_type mpiaij filter: grep -v type test: suffix: 24 nsize: 3 args: -mat_type mpidense test: suffix: 2_aijcusparse_1 args: -mat_type mpiaijcusparse -vec_type cuda filter: grep -v type output_file: output/ex5_21.out requires: cuda test: nsize: 3 suffix: 2_aijcusparse_2 filter: grep -v type args: -mat_type mpiaijcusparse -vec_type cuda args: -sf_type {{basic neighbor}} output_file: output/ex5_23.out requires: cuda test: nsize: 3 suffix: 2_aijcusparse_3 filter: grep -v type args: -mat_type mpiaijcusparse -vec_type cuda args: -sf_type {{basic neighbor}} output_file: output/ex5_23.out requires: cuda defined(PETSC_HAVE_MPI_GPU_AWARE) test: suffix: 31 args: -mat_type mpiaij -test_diagonalscale filter: grep -v type test: suffix: 32 args: -mat_type mpibaij -test_diagonalscale test: suffix: 33 nsize: 3 args: -mat_type mpiaij -test_diagonalscale filter: grep -v type test: suffix: 34 nsize: 3 args: -mat_type mpibaij -test_diagonalscale test: suffix: 3_aijcusparse_1 args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale filter: grep -v type output_file: output/ex5_31.out requires: cuda test: suffix: 3_aijcusparse_2 nsize: 3 args: -mat_type mpiaijcusparse -vec_type cuda -test_diagonalscale filter: grep -v type output_file: output/ex5_33.out requires: cuda test: suffix: 3_kokkos nsize: 3 args: -mat_type mpiaijkokkos -vec_type kokkos -test_diagonalscale filter: grep -v type output_file: output/ex5_33.out requires: kokkos_kernels test: suffix: aijcusparse_1 args: -mat_type seqaijcusparse -vec_type cuda -rectA filter: grep -v type output_file: output/ex5_11_A.out requires: cuda test: suffix: aijcusparse_2 args: -mat_type seqaijcusparse -vec_type cuda -rectB filter: grep -v type output_file: output/ex5_11_B.out requires: cuda test: suffix: sell_1 args: -mat_type sell -mat_sell_slice_height 8 output_file: output/ex5_41.out test: suffix: sell_2 nsize: 3 args: -mat_type sell -mat_sell_slice_height 8 output_file: output/ex5_43.out test: suffix: sell_3 args: -mat_type sell -test_diagonalscale -mat_sell_slice_height 8 output_file: output/ex5_51.out test: suffix: sell_4 nsize: 3 args: -mat_type sell -test_diagonalscale -mat_sell_slice_height 8 output_file: output/ex5_53.out test: suffix: sell_5 nsize: 3 args: -mat_type sellcuda -vec_type cuda -test_diagonalscale -test_zeroentries output_file: output/ex5_55.out requires: cuda !complex test: suffix: sell_6 nsize: 3 args: -mat_type sellcuda -vec_type cuda -mat_sell_spmv_cuda_kernel {{1 2 3 4 5 6}} output_file: output/ex5_56.out requires: cuda !complex test: suffix: sell_7 args: -m 32 -mat_type sellcuda -vec_type cuda -mat_sell_spmv_cuda_kernel {{0 7 9}} -mat_sell_spmv_cuda_blocky {{2 4 8 16 32}} output_file: output/ex5_57.out requires: cuda !complex !single test: suffix: sell_8 nsize: 3 args: -mat_type sellhip -vec_type hip -test_diagonalscale -test_zeroentries filter: sed -e "s/hip/cuda/g" output_file: output/ex5_55.out requires: hip !complex test: suffix: sell_9 nsize: 3 args: -mat_type sellhip -vec_type hip -mat_sell_spmv_hip_kernel {{1 2 3 4 5 6}} filter: sed -e "s/hip/cuda/g" output_file: output/ex5_56.out requires: hip !complex test: suffix: sell_10 args: -m 32 -mat_type sellhip -vec_type hip -mat_sell_spmv_hip_kernel {{0 7 9}} -mat_sell_spmv_hip_blocky {{2 4 8 16 32}} filter: sed -e "s/hip/cuda/g" output_file: output/ex5_57.out requires: hip !complex !single TEST*/