1 static char help[] = "Benchmarking MatProduct with AIJ and its subclass matrix types\n"; 2 3 /* 4 Usage: 5 mpirun -n <np> ./ex6k 6 -A <filename> : input PETSc binary file for matrix A; one can convert a file from MatrixMarket using mat/tests/ex72.c 7 -P <filename> : input PETSc binary file for matrix P; optional, if not given, P = A 8 -mat_type <str> : aij or its subclass. Default is aij. 9 -prod_type <str> : AP, AtP, APt, PtAP or PAPt. Default is AP. 10 -n <num> : run MatProductNumeric() this many times and report average time. Default is 100. 11 12 Notes: 13 It uses CPU-timer to measure the time. 14 15 Examples: 16 On OLCF Summit (with GPU-aware MPI) 17 # 6 MPI ranks: 18 # 6 resource sets (-n 6), 1 MPI rank per RS (-a 1), 7 CPU cores per RS (-c 7), and 1 GPU per RS (-g 1), 6 RSs per node (-r 6) 19 jsrun --smpiargs "-gpu" -n 6 -a 1 -c 7 -g 1 -r 6 ./ex6k -A cage12.aij -mat_type aijcusparse 20 21 # 1 MPI rank 22 jsrun --smpiargs "-gpu" -n 1 -a 1 -c 7 -g 1 -r 1 ./ex6k -A cage12.aij -mat_type aijcusparse 23 24 On OLCF Crusher: 25 # 1 MPI rank 26 # run with 1 node (-N1), 1 mpi rank (-n1), 2 hardware threads per rank (-c2) 27 srun -N1 -n1 -c2 --gpus-per-node=8 --gpu-bind=closest ./ex6k -A HV15R.aij -mat_type aijkokkos 28 29 # 8 MPI ranks 30 srun -N1 -n8 -c2 --gpus-per-node=8 --gpu-bind=closest ./ex6k -A HV15R.aij -mat_type aijkokkos 31 */ 32 #include <petscmat.h> 33 #include <petscdevice.h> 34 35 #if defined(PETSC_HAVE_CUDA) 36 #include <petscdevice_cuda.h> 37 #define SyncDevice() PetscCallCUDA(cudaDeviceSynchronize()) 38 #elif defined(PETSC_HAVE_HIP) 39 #include <petscdevice_hip.h> 40 #define SyncDevice() PetscCallHIP(hipDeviceSynchronize()) 41 #elif defined(PETSC_HAVE_KOKKOS) 42 #include <Kokkos_Core.hpp> 43 #define SyncDevice() Kokkos::fence() 44 #else 45 #define SyncDevice() 46 #endif 47 48 int main(int argc, char **args) 49 { 50 Mat A, P, C; 51 Mat A2, P2, C2; /* Shadow matrices (of MATAIJ) of A,P,C for initialization and validation */ 52 char matTypeStr[64], prodTypeStr[32]; 53 char fileA[PETSC_MAX_PATH_LEN], fileP[PETSC_MAX_PATH_LEN]; 54 PetscViewer fdA, fdP; 55 PetscBool flg, flgA, flgP, equal = PETSC_FALSE; 56 PetscLogStage stage; 57 PetscInt i, n = 100, nskip = 2, M, N; 58 MatInfo info; 59 PetscLogDouble tstart = 0, tend = 0, avgTime; 60 PetscMPIInt size; 61 MatProductType prodType; 62 PetscBool isAP, isAtP, isAPt, isPtAP, isPAPt; 63 64 PetscFunctionBeginUser; 65 PetscCall(PetscInitialize(&argc, &args, nullptr, help)); 66 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 67 68 /* Read options -n */ 69 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL)); 70 71 /* Load the matrix from a binary file */ 72 PetscCall(PetscOptionsGetString(NULL, NULL, "-A", fileA, PETSC_MAX_PATH_LEN, &flgA)); 73 PetscCall(PetscOptionsGetString(NULL, NULL, "-P", fileP, PETSC_MAX_PATH_LEN, &flgP)); 74 PetscCheck(flgA, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must give a PETSc matrix binary file with the -A option"); 75 76 PetscCall(PetscOptionsGetString(NULL, NULL, "-mat_type", matTypeStr, sizeof(matTypeStr), &flg)); 77 if (!flg) PetscCall(PetscStrncpy(matTypeStr, MATAIJ, sizeof(matTypeStr))); /* Inject the default if not provided */ 78 79 PetscCall(PetscOptionsGetString(NULL, NULL, "-prod_type", prodTypeStr, sizeof(prodTypeStr), &flg)); 80 if (!flg) PetscCall(PetscStrncpy(prodTypeStr, "AP", sizeof(prodTypeStr))); /* Inject the default if not provided */ 81 82 PetscCall(PetscStrcmp(prodTypeStr, "AP", &isAP)); 83 PetscCall(PetscStrcmp(prodTypeStr, "AtP", &isAtP)); 84 PetscCall(PetscStrcmp(prodTypeStr, "APt", &isAPt)); 85 PetscCall(PetscStrcmp(prodTypeStr, "PtAP", &isPtAP)); 86 PetscCall(PetscStrcmp(prodTypeStr, "PAPt", &isPAPt)); 87 88 if (isAP) prodType = MATPRODUCT_AB; 89 else if (isAtP) prodType = MATPRODUCT_AtB; 90 else if (isAPt) prodType = MATPRODUCT_ABt; 91 else if (isPtAP) prodType = MATPRODUCT_PtAP; 92 else if (isPAPt) prodType = MATPRODUCT_RARt; 93 else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER, "Unsupported product type %s", prodTypeStr); 94 95 /* Read the matrix file to A2 */ 96 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, fileA, FILE_MODE_READ, &fdA)); 97 PetscCall(MatCreate(PETSC_COMM_WORLD, &A2)); 98 PetscCall(MatSetType(A2, MATAIJ)); 99 PetscCall(MatLoad(A2, fdA)); 100 PetscCall(PetscViewerDestroy(&fdA)); 101 102 PetscCall(MatGetSize(A2, &M, &N)); 103 PetscCall(MatGetInfo(A2, MAT_GLOBAL_SUM, &info)); 104 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Input matrix A: %s, %" PetscInt_FMT " x %" PetscInt_FMT ", %lld nonzeros, %.1f per row\n", fileA, M, N, (long long)info.nz_used, (double)info.nz_used / (double)M)); 105 106 /* Copy A2 to A and convert A to the specified type */ 107 PetscCall(MatDuplicate(A2, MAT_COPY_VALUES, &A)); 108 PetscCall(MatConvert(A, matTypeStr, MAT_INPLACE_MATRIX, &A)); 109 110 /* Init P, P2 similarly */ 111 if (flgP) { /* If user provided P */ 112 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, fileP, FILE_MODE_READ, &fdP)); 113 PetscCall(MatCreate(PETSC_COMM_WORLD, &P2)); 114 PetscCall(MatSetType(P2, MATAIJ)); 115 PetscCall(MatLoad(P2, fdP)); 116 PetscCall(PetscViewerDestroy(&fdP)); 117 118 PetscCall(MatDuplicate(P2, MAT_COPY_VALUES, &P)); 119 PetscCall(MatConvert(P, matTypeStr, MAT_INPLACE_MATRIX, &P)); 120 121 PetscCall(MatGetSize(P2, &M, &N)); 122 PetscCall(MatGetInfo(P2, MAT_GLOBAL_SUM, &info)); 123 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Input matrix P: %s, %" PetscInt_FMT " x %" PetscInt_FMT ", %lld nonzeros, %.1f per row\n", fileP, M, N, (long long)info.nz_used, (double)info.nz_used / (double)M)); 124 } else { /* otherwise just let P = A */ 125 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Input matrix P = A\n")); 126 P2 = A2; 127 P = A; 128 } 129 130 /* Compute the reference C2 */ 131 PetscCall(MatProductCreate(A2, P2, NULL, &C2)); 132 PetscCall(MatProductSetType(C2, prodType)); 133 PetscCall(MatProductSetFill(C2, PETSC_DEFAULT)); 134 PetscCall(MatProductSetFromOptions(C2)); 135 PetscCall(MatProductSymbolic(C2)); 136 PetscCall(MatProductNumeric(C2)); 137 PetscCall(MatGetSize(C2, &M, &N)); 138 PetscCall(MatGetInfo(C2, MAT_GLOBAL_SUM, &info)); 139 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Mat product C = %s: %" PetscInt_FMT " x %" PetscInt_FMT ", %lld nonzeros, %.1f per row\n", prodTypeStr, M, N, (long long)info.nz_used, (double)info.nz_used / (double)M)); 140 141 /* Compute C */ 142 PetscCall(MatProductCreate(A, P, NULL, &C)); 143 PetscCall(MatProductSetType(C, prodType)); 144 PetscCall(MatProductSetAlgorithm(C, MATPRODUCTALGORITHMBACKEND)); 145 PetscCall(MatProductSetFill(C, PETSC_DEFAULT)); 146 PetscCall(MatProductSetFromOptions(C)); 147 148 /* Measure MatProductSymbolic */ 149 PetscCall(PetscLogStageRegister("MatProductSymbolic", &stage)); 150 PetscCall(PetscLogStagePush(stage)); 151 SyncDevice(); 152 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 153 PetscCall(PetscTime(&tstart)); 154 PetscCall(MatProductSymbolic(C)); 155 SyncDevice(); 156 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 157 PetscCall(PetscTime(&tend)); 158 avgTime = (tend - tstart) * 1e6; /* microseconds */ 159 PetscCall(PetscLogStagePop()); 160 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nMatProductSymbolic() time (us) with %d MPI ranks = %8.2f\n", size, avgTime)); 161 162 /* Measure MatProductNumeric */ 163 PetscCall(PetscLogStageRegister("MatProductNumeric", &stage)); 164 for (i = 0; i < n + nskip; i++) { 165 if (i == nskip) { 166 SyncDevice(); 167 PetscCall(PetscLogStagePush(stage)); 168 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 169 PetscCall(PetscTime(&tstart)); 170 } 171 PetscCall(MatProductReplaceMats(A, P, NULL, C)); 172 PetscCall(MatProductNumeric(C)); 173 } 174 SyncDevice(); 175 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 176 PetscCall(PetscTime(&tend)); 177 avgTime = (tend - tstart) * 1e6 / n; /* microseconds */ 178 PetscCall(PetscLogStagePop()); 179 180 PetscCall(MatMultEqual(C, C2, 8, &equal)); /* Not MatEqual() since C and C2 are not necessarily bitwise equal */ 181 182 PetscCheck(equal, PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Matrix production error"); 183 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "MatProductNumeric() average time (us) with %d MPI ranks = %8.2f\n", size, avgTime)); 184 185 PetscCall(MatDestroy(&A)); 186 if (flgP) PetscCall(MatDestroy(&P)); 187 PetscCall(MatDestroy(&C)); 188 189 PetscCall(MatDestroy(&A2)); 190 if (flgP) PetscCall(MatDestroy(&P2)); 191 PetscCall(MatDestroy(&C2)); 192 193 PetscCall(PetscFinalize()); 194 return 0; 195 } 196 197 /*TEST 198 199 testset: 200 args: -n 2 -A ${DATAFILESPATH}/matrices/small 201 nsize: 1 202 filter: grep "DOES_NOT_EXIST" 203 output_file: output/empty.out 204 requires: datafilespath !complex double !defined(PETSC_USE_64BIT_INDICES) kokkos_kernels 205 206 test: 207 suffix: 1 208 requires: cuda 209 args: -mat_type aijcusparse 210 211 test: 212 suffix: 2 213 args: -mat_type aijkokkos 214 215 test: 216 suffix: 3 217 requires: hip 218 args: -mat_type aijhipsparse 219 220 TEST*/ 221