1 static char help[] = "Driver for benchmarking SpMV."; 2 3 #include <petscmat.h> 4 #include "cJSON.h" 5 #include "mmloader.h" 6 7 char *read_file(const char *filename) 8 { 9 FILE *file = NULL; 10 long length = 0; 11 char *content = NULL; 12 size_t read_chars = 0; 13 14 /* open in read binary mode */ 15 file = fopen(filename, "rb"); 16 if (file) { 17 /* get the length */ 18 fseek(file, 0, SEEK_END); 19 length = ftell(file); 20 fseek(file, 0, SEEK_SET); 21 /* allocate content buffer */ 22 content = (char *)malloc((size_t)length + sizeof("")); 23 /* read the file into memory */ 24 read_chars = fread(content, sizeof(char), (size_t)length, file); 25 content[read_chars] = '\0'; 26 fclose(file); 27 } 28 return content; 29 } 30 31 void write_file(const char *filename, const char *content) 32 { 33 FILE *file = NULL; 34 file = fopen(filename, "w"); 35 if (file) fputs(content, file); 36 fclose(file); 37 } 38 39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat) 40 { 41 char *content = read_file(inputjsonfile); 42 cJSON *matrix_json = NULL; 43 const cJSON *problem = NULL, *elem = NULL; 44 const cJSON *item = NULL; 45 char **filenames, **groupnames, **matnames; 46 int i, n; 47 if (!content) return 0; 48 matrix_json = cJSON_Parse(content); 49 if (!matrix_json) return 0; 50 n = cJSON_GetArraySize(matrix_json); 51 *nmat = n; 52 filenames = (char **)malloc(sizeof(char *) * n); 53 groupnames = (char **)malloc(sizeof(char *) * n); 54 matnames = (char **)malloc(sizeof(char *) * n); 55 for (i = 0; i < n; i++) { 56 elem = cJSON_GetArrayItem(matrix_json, i); 57 item = cJSON_GetObjectItemCaseSensitive(elem, "filename"); 58 filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1)); 59 strcpy(filenames[i], item->valuestring); 60 problem = cJSON_GetObjectItemCaseSensitive(elem, "problem"); 61 item = cJSON_GetObjectItemCaseSensitive(problem, "group"); 62 groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 63 strcpy(groupnames[i], item->valuestring); 64 item = cJSON_GetObjectItemCaseSensitive(problem, "name"); 65 matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 66 strcpy(matnames[i], item->valuestring); 67 } 68 cJSON_Delete(matrix_json); 69 free(content); 70 *outputfilenames = filenames; 71 *outputgroupnames = groupnames; 72 *outputmatnames = matnames; 73 return 0; 74 } 75 76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions) 77 { 78 char *content = read_file(inputjsonfile); 79 cJSON *matrix_json = NULL; 80 cJSON *elem = NULL; 81 int i, n; 82 if (!content) return 0; 83 matrix_json = cJSON_Parse(content); 84 if (!matrix_json) return 0; 85 n = cJSON_GetArraySize(matrix_json); 86 for (i = 0; i < n; i++) { 87 cJSON *spmv = NULL; 88 cJSON *format = NULL; 89 elem = cJSON_GetArrayItem(matrix_json, i); 90 spmv = cJSON_GetObjectItem(elem, "spmv"); 91 if (spmv) { 92 format = cJSON_GetObjectItem(spmv, matformat); 93 if (format) { 94 cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 95 cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions); 96 } else { 97 format = cJSON_CreateObject(); 98 cJSON_AddItemToObject(spmv, matformat, format); 99 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 100 cJSON_AddNumberToObject(format, "repetitions", repetitions); 101 } 102 } else { 103 spmv = cJSON_CreateObject(); 104 cJSON_AddItemToObject(elem, "spmv", spmv); 105 format = cJSON_CreateObject(); 106 cJSON_AddItemToObject(spmv, matformat, format); 107 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 108 cJSON_AddNumberToObject(format, "repetitions", repetitions); 109 } 110 } 111 free(content); 112 content = cJSON_Print(matrix_json); 113 write_file(inputjsonfile, content); 114 cJSON_Delete(matrix_json); 115 free(content); 116 return 0; 117 } 118 119 /* 120 For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU. 121 The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently 122 in each repetition. As a result, each MatMult call is fresh, and we can capture 123 the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect 124 during consecutive calls. 125 */ 126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions) 127 { 128 Mat A2 = NULL; 129 PetscInt i; 130 Vec u; 131 PetscLogDouble vstart = 0, vend = 0; 132 PetscBool isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip; 133 134 PetscFunctionBeginUser; 135 PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse)); 136 PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse)); 137 PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos)); 138 PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda)); 139 PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip)); 140 if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA)); 141 if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS)); 142 if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP)); 143 PetscCall(VecDuplicate(b, &u)); 144 if (time) *time = 0.0; 145 for (i = 0; i < repetitions; i++) { 146 if (use_gpu) { 147 PetscCall(MatDestroy(&A2)); 148 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2)); 149 PetscCall(MatSetType(A2, petscmatformat)); 150 PetscCall(MatSetFromOptions(A2)); // This allows to change parameters such as slice height in SpMV kernels for SELL 151 } else A2 = A; 152 /* Timing MatMult */ 153 if (time) PetscCall(PetscTime(&vstart)); 154 155 PetscCall(MatMult(A2, b, u)); 156 157 if (time) { 158 PetscCall(PetscTime(&vend)); 159 *time += (PetscReal)(vend - vstart); 160 } 161 } 162 PetscCall(VecDestroy(&u)); 163 if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2)); 164 PetscFunctionReturn(PETSC_SUCCESS); 165 } 166 167 PetscErrorCode WarmUpDevice(Mat A, Vec b, const char *petscmatformat) 168 { 169 Mat A2 = NULL; 170 PetscLogEvent event; 171 Vec u; 172 PetscBool isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip; 173 174 PetscFunctionBeginUser; 175 PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse)); 176 PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse)); 177 PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos)); 178 PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda)); 179 PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip)); 180 if (!isaijcusparse && !isaijkokkos && !isaijhipsparse && !issellcuda && !issellhip) PetscFunctionReturn(PETSC_SUCCESS); 181 if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA)); 182 if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS)); 183 if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP)); 184 PetscCall(VecDuplicate(b, &u)); 185 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2)); 186 PetscCall(MatSetType(A2, petscmatformat)); 187 PetscCall(PetscLogEventGetId("MatMult", &event)); 188 PetscCall(PetscLogEventDeactivatePush(event)); 189 PetscCall(MatMult(A2, b, u)); 190 PetscCall(PetscLogEventDeactivatePop(event)); 191 PetscCall(VecDestroy(&u)); 192 PetscCall(MatDestroy(&A2)); 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat) 197 { 198 PetscLogEvent event; 199 PetscEventPerfInfo eventInfo; 200 // PetscReal gpuflopRate; 201 202 // if (matformat) { 203 // PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event)); 204 // } else { 205 // PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event)); 206 // } 207 // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 208 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time)); 209 210 PetscFunctionBeginUser; 211 PetscCall(PetscLogEventGetId("MatMult", &event)); 212 PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 213 // gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime; 214 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time)); 215 if (cputime) *cputime = eventInfo.time; 216 #if defined(PETSC_HAVE_DEVICE) 217 if (gputime) *gputime = eventInfo.GpuTime; 218 if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6; 219 #endif 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 223 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat) 224 { 225 PetscBool iscsr, issell, iscsrkokkos; 226 227 PetscFunctionBeginUser; 228 PetscCall(PetscStrcmp(matformat, "csr", &iscsr)); 229 if (iscsr) { 230 if (use_gpu) { 231 #if defined(PETSC_HAVE_CUDA) 232 PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat)); 233 #endif 234 #if defined(PETSC_HAVE_HIP) 235 PetscCall(PetscStrallocpy(MATAIJHIPSPARSE, petscmatformat)); 236 #endif 237 } else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat)); 238 } else { 239 PetscCall(PetscStrcmp(matformat, "sell", &issell)); 240 if (issell) { 241 if (use_gpu) { 242 #if defined(PETSC_HAVE_CUDA) 243 PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat)); 244 #endif 245 #if defined(PETSC_HAVE_HIP) 246 PetscCall(PetscStrallocpy(MATSELLHIP, petscmatformat)); 247 #endif 248 } else PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); 249 } else { 250 PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos)); 251 if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat)); 252 } 253 } 254 PetscFunctionReturn(PETSC_SUCCESS); 255 } 256 257 int main(int argc, char **args) 258 { 259 PetscInt nmat = 1, nformats = 5, i, j, repetitions = 1; 260 Mat A; 261 Vec b; 262 char jfilename[PETSC_MAX_PATH_LEN]; 263 char filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN]; 264 char groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN]; 265 char *matformats[5]; 266 char **filenames = NULL, **groupnames = NULL, **matnames = NULL; 267 char ordering[256] = MATORDERINGRCM; 268 PetscBool bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE; 269 IS rowperm = NULL, colperm = NULL; 270 PetscViewer fd; 271 PetscReal starting_spmv_time = 0, *spmv_times; 272 273 PetscCall(PetscOptionsInsertString(NULL, "-log_view :/dev/null")); 274 if (PetscDefined(HAVE_DEVICE)) PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time")); 275 PetscCall(PetscInitialize(&argc, &args, NULL, help)); 276 PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1)); 277 if (!flg1) { 278 nformats = 1; 279 PetscCall(PetscStrallocpy("csr", &matformats[0])); 280 } 281 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL)); 282 PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL)); 283 /* Read matrix and RHS */ 284 PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL)); 285 PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL)); 286 PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1)); 287 PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2)); 288 PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3)); 289 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", ""); 290 PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute)); 291 PetscOptionsEnd(); 292 #if !defined(PETSC_HAVE_DEVICE) 293 PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support"); 294 #endif 295 PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format"); 296 if (flg3) { 297 ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat); 298 PetscCall(PetscCalloc1(nmat, &spmv_times)); 299 } else if (flg2) { 300 PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE)); 301 } else if (flg1) { 302 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd)); 303 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 304 PetscCall(MatSetType(A, MATAIJ)); 305 PetscCall(MatSetFromOptions(A)); 306 PetscCall(MatLoad(A, fd)); 307 PetscCall(PetscViewerDestroy(&fd)); 308 } 309 if (permute) { 310 Mat Aperm; 311 PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm)); 312 PetscCall(MatPermute(A, rowperm, colperm, &Aperm)); 313 PetscCall(MatDestroy(&A)); 314 A = Aperm; /* Replace original operator with permuted version */ 315 } 316 /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */ 317 PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg)); 318 if (bflg) { 319 PetscViewer fb; 320 PetscCall(VecCreate(PETSC_COMM_WORLD, &b)); 321 PetscCall(VecSetFromOptions(b)); 322 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb)); 323 PetscCall(VecLoad(b, fb)); 324 PetscCall(PetscViewerDestroy(&fb)); 325 } 326 327 for (j = 0; j < nformats; j++) { 328 char *petscmatformat = NULL; 329 PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat)); 330 PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]); 331 if (flg3) { // mat names specified in a JSON file 332 for (i = 0; i < nmat; i++) { 333 PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE)); 334 if (!bflg) { 335 PetscCall(MatCreateVecs(A, &b, NULL)); 336 PetscCall(VecSet(b, 1.0)); 337 } 338 if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat)); 339 PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions)); 340 if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat)); 341 else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat)); 342 PetscCall(MatDestroy(&A)); 343 if (!bflg) PetscCall(VecDestroy(&b)); 344 } 345 UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions); 346 starting_spmv_time = spmv_times[nmat - 1]; 347 } else { 348 PetscReal spmv_time; 349 if (!bflg) { 350 PetscCall(MatCreateVecs(A, &b, NULL)); 351 PetscCall(VecSet(b, 1.0)); 352 } 353 if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat)); 354 PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions)); 355 if (!bflg) PetscCall(VecDestroy(&b)); 356 } 357 PetscCall(PetscFree(petscmatformat)); 358 } 359 if (flg3) { 360 for (i = 0; i < nmat; i++) { 361 free(filenames[i]); 362 free(groupnames[i]); 363 free(matnames[i]); 364 } 365 free(filenames); 366 free(groupnames); 367 free(matnames); 368 PetscCall(PetscFree(spmv_times)); 369 } 370 for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j])); 371 if (flg1 || flg2) PetscCall(MatDestroy(&A)); 372 if (bflg) PetscCall(VecDestroy(&b)); 373 PetscCall(ISDestroy(&rowperm)); 374 PetscCall(ISDestroy(&colperm)); 375 PetscCall(PetscFinalize()); 376 return 0; 377 } 378 /*TEST 379 380 build: 381 requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES) 382 depends: mmloader.c mmio.c cJSON.c 383 384 test: 385 suffix: 1 386 args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx 387 output_file: output/empty.out 388 389 test: 390 suffix: 2 391 args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu 392 output_file: output/empty.out 393 requires: cuda 394 395 test: 396 suffix: 3 397 args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu 398 output_file: output/empty.out 399 requires: hip 400 401 TEST*/ 402