1 static char help[] = "Driver for benchmarking SpMV."; 2 3 #include <petscmat.h> 4 #include "cJSON.h" 5 #include "mmloader.h" 6 7 char *read_file(const char *filename) 8 { 9 FILE *file = NULL; 10 long length = 0; 11 char *content = NULL; 12 size_t read_chars = 0; 13 14 /* open in read binary mode */ 15 file = fopen(filename, "rb"); 16 if (file) { 17 /* get the length */ 18 fseek(file, 0, SEEK_END); 19 length = ftell(file); 20 fseek(file, 0, SEEK_SET); 21 /* allocate content buffer */ 22 content = (char *)malloc((size_t)length + sizeof("")); 23 /* read the file into memory */ 24 read_chars = fread(content, sizeof(char), (size_t)length, file); 25 content[read_chars] = '\0'; 26 fclose(file); 27 } 28 return content; 29 } 30 31 void write_file(const char *filename, const char *content) 32 { 33 FILE *file = NULL; 34 file = fopen(filename, "w"); 35 if (file) { fputs(content, file); } 36 fclose(file); 37 } 38 39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat) 40 { 41 char *content = read_file(inputjsonfile); 42 cJSON *matrix_json = NULL; 43 const cJSON *problem = NULL, *elem = NULL; 44 const cJSON *item = NULL; 45 char **filenames, **groupnames, **matnames; 46 int i, n; 47 if (!content) return 0; 48 matrix_json = cJSON_Parse(content); 49 if (!matrix_json) return 0; 50 n = cJSON_GetArraySize(matrix_json); 51 *nmat = n; 52 filenames = (char **)malloc(sizeof(char *) * n); 53 groupnames = (char **)malloc(sizeof(char *) * n); 54 matnames = (char **)malloc(sizeof(char *) * n); 55 for (i = 0; i < n; i++) { 56 elem = cJSON_GetArrayItem(matrix_json, i); 57 item = cJSON_GetObjectItemCaseSensitive(elem, "filename"); 58 filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1)); 59 strcpy(filenames[i], item->valuestring); 60 problem = cJSON_GetObjectItemCaseSensitive(elem, "problem"); 61 item = cJSON_GetObjectItemCaseSensitive(problem, "group"); 62 groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 63 strcpy(groupnames[i], item->valuestring); 64 item = cJSON_GetObjectItemCaseSensitive(problem, "name"); 65 matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 66 strcpy(matnames[i], item->valuestring); 67 } 68 cJSON_Delete(matrix_json); 69 free(content); 70 *outputfilenames = filenames; 71 *outputgroupnames = groupnames; 72 *outputmatnames = matnames; 73 return 0; 74 } 75 76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions) 77 { 78 char *content = read_file(inputjsonfile); 79 cJSON *matrix_json = NULL; 80 cJSON *elem = NULL; 81 int i, n; 82 if (!content) return 0; 83 matrix_json = cJSON_Parse(content); 84 if (!matrix_json) return 0; 85 n = cJSON_GetArraySize(matrix_json); 86 for (i = 0; i < n; i++) { 87 cJSON *spmv = NULL; 88 cJSON *format = NULL; 89 elem = cJSON_GetArrayItem(matrix_json, i); 90 spmv = cJSON_GetObjectItem(elem, "spmv"); 91 if (spmv) { 92 format = cJSON_GetObjectItem(spmv, matformat); 93 if (format) { 94 cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 95 cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions); 96 } else { 97 format = cJSON_CreateObject(); 98 cJSON_AddItemToObject(spmv, matformat, format); 99 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 100 cJSON_AddNumberToObject(format, "repetitions", repetitions); 101 } 102 } else { 103 spmv = cJSON_CreateObject(); 104 cJSON_AddItemToObject(elem, "spmv", spmv); 105 format = cJSON_CreateObject(); 106 cJSON_AddItemToObject(spmv, matformat, format); 107 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 108 cJSON_AddNumberToObject(format, "repetitions", repetitions); 109 } 110 } 111 free(content); 112 content = cJSON_Print(matrix_json); 113 write_file(inputjsonfile, content); 114 cJSON_Delete(matrix_json); 115 free(content); 116 return 0; 117 } 118 119 /* 120 For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU. 121 The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently 122 in each repetition. As a result, each MatMult call is fresh, and we can capture 123 the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect 124 during consecutive calls. 125 */ 126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions) 127 { 128 Mat A2 = NULL; 129 PetscInt i; 130 Vec u; 131 PetscLogDouble vstart = 0, vend = 0; 132 PetscBool isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip; 133 134 PetscFunctionBeginUser; 135 PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse)); 136 PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse)); 137 PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos)); 138 PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda)); 139 PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip)); 140 if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA)); 141 if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS)); 142 if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP)); 143 PetscCall(VecDuplicate(b, &u)); 144 if (time) *time = 0.0; 145 for (i = 0; i < repetitions; i++) { 146 if (use_gpu) { 147 PetscCall(MatDestroy(&A2)); 148 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2)); 149 if (issellcuda) { 150 PetscCall(MatConvert(A2, MATSELL, MAT_INPLACE_MATRIX, &A2)); 151 PetscCall(MatConvert(A2, MATSELLCUDA, MAT_INPLACE_MATRIX, &A2)); 152 } else if (issellhip) { 153 PetscCall(MatConvert(A2, MATSELL, MAT_INPLACE_MATRIX, &A2)); 154 PetscCall(MatConvert(A2, MATSELLHIP, MAT_INPLACE_MATRIX, &A2)); 155 } else { 156 PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2)); 157 } 158 } else A2 = A; 159 /* Timing MatMult */ 160 if (time) PetscCall(PetscTime(&vstart)); 161 162 PetscCall(MatMult(A2, b, u)); 163 164 if (time) { 165 PetscCall(PetscTime(&vend)); 166 *time += (PetscReal)(vend - vstart); 167 } 168 } 169 PetscCall(VecDestroy(&u)); 170 if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2)); 171 PetscFunctionReturn(PETSC_SUCCESS); 172 } 173 174 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat) 175 { 176 PetscLogEvent event; 177 PetscEventPerfInfo eventInfo; 178 //PetscReal gpuflopRate; 179 180 // if (matformat) { 181 // PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event)); 182 // } else { 183 // PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event)); 184 // } 185 // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 186 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time)); 187 188 PetscFunctionBeginUser; 189 PetscCall(PetscLogEventGetId("MatMult", &event)); 190 PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 191 //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime; 192 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time)); 193 if (cputime) *cputime = eventInfo.time; 194 #if defined(PETSC_HAVE_DEVICE) 195 if (gputime) *gputime = eventInfo.GpuTime; 196 if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6; 197 #endif 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat) 202 { 203 PetscBool iscsr, issell, iscsrkokkos; 204 205 PetscFunctionBeginUser; 206 PetscCall(PetscStrcmp(matformat, "csr", &iscsr)); 207 if (iscsr) { 208 if (use_gpu) { 209 #if defined(PETSC_HAVE_CUDA) 210 PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat)); 211 #endif 212 #if defined(PETSC_HAVE_HIP) 213 PetscCall(PetscStrallocpy(MATAIJHIPSPARSE, petscmatformat)); 214 #endif 215 } else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat)); 216 } else { 217 PetscCall(PetscStrcmp(matformat, "sell", &issell)); 218 if (issell) { 219 if (use_gpu) { 220 #if defined(PETSC_HAVE_CUDA) 221 PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat)); 222 #endif 223 #if defined(PETSC_HAVE_HIP) 224 PetscCall(PetscStrallocpy(MATSELLHIP, petscmatformat)); 225 #endif 226 } else PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); 227 } else { 228 PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos)); 229 if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat)); 230 } 231 } 232 PetscFunctionReturn(PETSC_SUCCESS); 233 } 234 235 int main(int argc, char **args) 236 { 237 PetscInt nmat = 1, nformats = 5, i, j, repetitions = 1; 238 Mat A; 239 Vec b; 240 char jfilename[PETSC_MAX_PATH_LEN]; 241 char filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN]; 242 char groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN]; 243 char *matformats[5]; 244 char **filenames = NULL, **groupnames = NULL, **matnames = NULL; 245 char ordering[256] = MATORDERINGRCM; 246 PetscBool bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE; 247 IS rowperm = NULL, colperm = NULL; 248 PetscViewer fd; 249 PetscReal starting_spmv_time = 0, *spmv_times; 250 251 PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null")); 252 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 253 PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1)); 254 if (!flg1) { 255 nformats = 1; 256 PetscCall(PetscStrallocpy("csr", &matformats[0])); 257 } 258 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL)); 259 PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL)); 260 /* Read matrix and RHS */ 261 PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL)); 262 PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL)); 263 PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1)); 264 PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2)); 265 PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3)); 266 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", ""); 267 PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute)); 268 PetscOptionsEnd(); 269 #if !defined(PETSC_HAVE_DEVICE) 270 PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support"); 271 #endif 272 PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format"); 273 if (flg3) { 274 ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat); 275 PetscCall(PetscCalloc1(nmat, &spmv_times)); 276 } else if (flg2) { 277 PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE)); 278 } else if (flg1) { 279 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd)); 280 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 281 PetscCall(MatSetType(A, MATAIJ)); 282 PetscCall(MatSetFromOptions(A)); 283 PetscCall(MatLoad(A, fd)); 284 PetscCall(PetscViewerDestroy(&fd)); 285 } 286 if (permute) { 287 Mat Aperm; 288 PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm)); 289 PetscCall(MatPermute(A, rowperm, colperm, &Aperm)); 290 PetscCall(MatDestroy(&A)); 291 A = Aperm; /* Replace original operator with permuted version */ 292 } 293 /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */ 294 PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg)); 295 if (bflg) { 296 PetscViewer fb; 297 PetscCall(VecCreate(PETSC_COMM_WORLD, &b)); 298 PetscCall(VecSetFromOptions(b)); 299 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb)); 300 PetscCall(VecLoad(b, fb)); 301 PetscCall(PetscViewerDestroy(&fb)); 302 } 303 304 for (j = 0; j < nformats; j++) { 305 char *petscmatformat = NULL; 306 PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat)); 307 PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]); 308 if (flg3) { // mat names specified in a JSON file 309 for (i = 0; i < nmat; i++) { 310 PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE)); 311 if (!bflg) { 312 PetscCall(MatCreateVecs(A, &b, NULL)); 313 PetscCall(VecSet(b, 1.0)); 314 } 315 PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions)); 316 if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat)); 317 else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat)); 318 PetscCall(MatDestroy(&A)); 319 if (!bflg) PetscCall(VecDestroy(&b)); 320 } 321 UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions); 322 starting_spmv_time = spmv_times[nmat - 1]; 323 } else { 324 PetscReal spmv_time; 325 if (!bflg) { 326 PetscCall(MatCreateVecs(A, &b, NULL)); 327 PetscCall(VecSet(b, 1.0)); 328 } 329 PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions)); 330 if (!bflg) PetscCall(VecDestroy(&b)); 331 } 332 PetscCall(PetscFree(petscmatformat)); 333 } 334 if (flg3) { 335 for (i = 0; i < nmat; i++) { 336 free(filenames[i]); 337 free(groupnames[i]); 338 free(matnames[i]); 339 } 340 free(filenames); 341 free(groupnames); 342 free(matnames); 343 PetscCall(PetscFree(spmv_times)); 344 } 345 for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j])); 346 if (flg1 || flg2) PetscCall(MatDestroy(&A)); 347 if (bflg) PetscCall(VecDestroy(&b)); 348 PetscCall(ISDestroy(&rowperm)); 349 PetscCall(ISDestroy(&colperm)); 350 PetscCall(PetscFinalize()); 351 return 0; 352 } 353 /*TEST 354 355 build: 356 requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES) 357 depends: mmloader.c mmio.c cJSON.c 358 359 test: 360 suffix: 1 361 args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx 362 363 test: 364 suffix: 2 365 args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu 366 output_file: output/bench_spmv_1.out 367 requires: cuda 368 369 test: 370 suffix: 3 371 args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu 372 output_file: output/bench_spmv_1.out 373 requires: hip 374 375 TEST*/ 376