1 static char help[] = "Driver for benchmarking SpMV."; 2 3 #include <petscmat.h> 4 #include "cJSON.h" 5 #include "mmloader.h" 6 7 char *read_file(const char *filename) 8 { 9 FILE *file = NULL; 10 long length = 0; 11 char *content = NULL; 12 size_t read_chars = 0; 13 14 /* open in read binary mode */ 15 file = fopen(filename, "rb"); 16 if (file) { 17 /* get the length */ 18 fseek(file, 0, SEEK_END); 19 length = ftell(file); 20 fseek(file, 0, SEEK_SET); 21 /* allocate content buffer */ 22 content = (char *)malloc((size_t)length + sizeof("")); 23 /* read the file into memory */ 24 read_chars = fread(content, sizeof(char), (size_t)length, file); 25 content[read_chars] = '\0'; 26 fclose(file); 27 } 28 return content; 29 } 30 31 void write_file(const char *filename, const char *content) 32 { 33 FILE *file = NULL; 34 file = fopen(filename, "w"); 35 if (file) { fputs(content, file); } 36 fclose(file); 37 } 38 39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat) 40 { 41 char *content = read_file(inputjsonfile); 42 cJSON *matrix_json = NULL; 43 const cJSON *problem = NULL, *elem = NULL; 44 const cJSON *item = NULL; 45 char **filenames, **groupnames, **matnames; 46 int i, n; 47 if (!content) return 0; 48 matrix_json = cJSON_Parse(content); 49 if (!matrix_json) return 0; 50 n = cJSON_GetArraySize(matrix_json); 51 *nmat = n; 52 filenames = (char **)malloc(sizeof(char *) * n); 53 groupnames = (char **)malloc(sizeof(char *) * n); 54 matnames = (char **)malloc(sizeof(char *) * n); 55 for (i = 0; i < n; i++) { 56 elem = cJSON_GetArrayItem(matrix_json, i); 57 item = cJSON_GetObjectItemCaseSensitive(elem, "filename"); 58 filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1)); 59 strcpy(filenames[i], item->valuestring); 60 problem = cJSON_GetObjectItemCaseSensitive(elem, "problem"); 61 item = cJSON_GetObjectItemCaseSensitive(problem, "group"); 62 groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 63 strcpy(groupnames[i], item->valuestring); 64 item = cJSON_GetObjectItemCaseSensitive(problem, "name"); 65 matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 66 strcpy(matnames[i], item->valuestring); 67 } 68 cJSON_Delete(matrix_json); 69 free(content); 70 *outputfilenames = filenames; 71 *outputgroupnames = groupnames; 72 *outputmatnames = matnames; 73 return 0; 74 } 75 76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions) 77 { 78 char *content = read_file(inputjsonfile); 79 cJSON *matrix_json = NULL; 80 cJSON *elem = NULL; 81 int i, n; 82 if (!content) return 0; 83 matrix_json = cJSON_Parse(content); 84 if (!matrix_json) return 0; 85 n = cJSON_GetArraySize(matrix_json); 86 for (i = 0; i < n; i++) { 87 cJSON *spmv = NULL; 88 cJSON *format = NULL; 89 elem = cJSON_GetArrayItem(matrix_json, i); 90 spmv = cJSON_GetObjectItem(elem, "spmv"); 91 if (spmv) { 92 format = cJSON_GetObjectItem(spmv, matformat); 93 if (format) { 94 cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 95 cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions); 96 } else { 97 format = cJSON_CreateObject(); 98 cJSON_AddItemToObject(spmv, matformat, format); 99 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 100 cJSON_AddNumberToObject(format, "repetitions", repetitions); 101 } 102 } else { 103 spmv = cJSON_CreateObject(); 104 cJSON_AddItemToObject(elem, "spmv", spmv); 105 format = cJSON_CreateObject(); 106 cJSON_AddItemToObject(spmv, matformat, format); 107 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 108 cJSON_AddNumberToObject(format, "repetitions", repetitions); 109 } 110 } 111 free(content); 112 content = cJSON_Print(matrix_json); 113 write_file(inputjsonfile, content); 114 cJSON_Delete(matrix_json); 115 free(content); 116 return 0; 117 } 118 119 /* 120 For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU. 121 The extra CPU copy allows us to destroy the GPU matrix and recreate it effciently 122 in each repetition. As a result, each MatMult call is fresh, and we can capture 123 the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect 124 during consecutive calls. 125 */ 126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions) 127 { 128 Mat A2 = NULL; 129 PetscInt i; 130 Vec u; 131 PetscLogDouble vstart = 0, vend = 0; 132 PetscBool isaijcusparse, isaijkokkos; 133 134 PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse)); 135 PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos)); 136 if (isaijcusparse) PetscCall(VecSetType(b, VECCUDA)); 137 if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS)); 138 PetscCall(VecDuplicate(b, &u)); 139 if (time) *time = 0.0; 140 for (i = 0; i < repetitions; i++) { 141 if (use_gpu) { 142 PetscCall(MatDestroy(&A2)); 143 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2)); 144 PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2)); 145 } else A2 = A; 146 /* Timing MatMult */ 147 if (time) PetscCall(PetscTime(&vstart)); 148 149 PetscCall(MatMult(A2, b, u)); 150 151 if (time) { 152 PetscCall(PetscTime(&vend)); 153 *time += (PetscReal)(vend - vstart); 154 } 155 } 156 PetscCall(VecDestroy(&u)); 157 if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2)); 158 return 0; 159 } 160 161 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat) 162 { 163 PetscLogEvent event; 164 PetscEventPerfInfo eventInfo; 165 //PetscReal gpuflopRate; 166 167 // if (matformat) { 168 // PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event)); 169 // } else { 170 // PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event)); 171 // } 172 // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 173 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time)); 174 175 PetscCall(PetscLogEventGetId("MatMult", &event)); 176 PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 177 //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime; 178 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time)); 179 if (cputime) *cputime = eventInfo.time; 180 #if defined(PETSC_HAVE_DEVICE) 181 if (gputime) *gputime = eventInfo.GpuTime; 182 if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6; 183 #endif 184 return 0; 185 } 186 187 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat) 188 { 189 PetscBool iscsr, issell, iscsrkokkos; 190 PetscCall(PetscStrcmp(matformat, "csr", &iscsr)); 191 if (iscsr) { 192 if (use_gpu) PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat)); 193 else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat)); 194 } else { 195 PetscCall(PetscStrcmp(matformat, "sell", &issell)); 196 if (issell) { 197 if (use_gpu) PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); // placeholder for SELLCUDA 198 else PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); 199 } else { 200 PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos)); 201 if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat)); 202 } 203 } 204 return 0; 205 } 206 207 int main(int argc, char **args) 208 { 209 PetscInt nmat = 1, nformats = 5, i, j, repetitions = 1; 210 Mat A; 211 Vec b; 212 char jfilename[PETSC_MAX_PATH_LEN]; 213 char filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN]; 214 char groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN]; 215 char *matformats[5]; 216 char **filenames = NULL, **groupnames = NULL, **matnames = NULL; 217 char ordering[256] = MATORDERINGRCM; 218 PetscBool bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE; 219 IS rowperm = NULL, colperm = NULL; 220 PetscViewer fd; 221 PetscReal starting_spmv_time = 0, *spmv_times; 222 223 PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null")); 224 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 225 PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1)); 226 if (!flg1) { 227 nformats = 1; 228 PetscCall(PetscStrallocpy("csr", &matformats[0])); 229 } 230 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL)); 231 PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL)); 232 /* Read matrix and RHS */ 233 PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL)); 234 PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL)); 235 PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1)); 236 PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2)); 237 PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3)); 238 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", ""); 239 PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute)); 240 PetscOptionsEnd(); 241 #if !defined(PETSC_HAVE_DEVICE) 242 PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support"); 243 #endif 244 PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format"); 245 if (flg3) { 246 ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat); 247 PetscCall(PetscCalloc1(nmat, &spmv_times)); 248 } else if (flg2) { 249 PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE)); 250 } else if (flg1) { 251 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd)); 252 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 253 PetscCall(MatSetType(A, MATAIJ)); 254 PetscCall(MatSetFromOptions(A)); 255 PetscCall(MatLoad(A, fd)); 256 PetscCall(PetscViewerDestroy(&fd)); 257 } 258 if (permute) { 259 Mat Aperm; 260 PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm)); 261 PetscCall(MatPermute(A, rowperm, colperm, &Aperm)); 262 PetscCall(MatDestroy(&A)); 263 A = Aperm; /* Replace original operator with permuted version */ 264 } 265 /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */ 266 PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg)); 267 if (bflg) { 268 PetscViewer fb; 269 PetscCall(VecCreate(PETSC_COMM_WORLD, &b)); 270 PetscCall(VecSetFromOptions(b)); 271 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb)); 272 PetscCall(VecLoad(b, fb)); 273 PetscCall(PetscViewerDestroy(&fb)); 274 } 275 276 for (j = 0; j < nformats; j++) { 277 char *petscmatformat = NULL; 278 MapToPetscMatType(matformats[j], use_gpu, &petscmatformat); 279 PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]); 280 if (flg3) { // mat names specified in a JSON file 281 for (i = 0; i < nmat; i++) { 282 PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE)); 283 if (!bflg) { 284 PetscCall(MatCreateVecs(A, &b, NULL)); 285 PetscCall(VecSet(b, 1.0)); 286 } 287 PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions)); 288 if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat)); 289 else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat)); 290 PetscCall(MatDestroy(&A)); 291 if (!bflg) PetscCall(VecDestroy(&b)); 292 } 293 UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions); 294 starting_spmv_time = spmv_times[nmat - 1]; 295 } else { 296 PetscReal spmv_time; 297 if (!bflg) { 298 PetscCall(MatCreateVecs(A, &b, NULL)); 299 PetscCall(VecSet(b, 1.0)); 300 } 301 PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions)); 302 if (!bflg) PetscCall(VecDestroy(&b)); 303 } 304 PetscCall(PetscFree(petscmatformat)); 305 } 306 if (flg3) { 307 for (i = 0; i < nmat; i++) { 308 free(filenames[i]); 309 free(groupnames[i]); 310 free(matnames[i]); 311 } 312 free(filenames); 313 free(groupnames); 314 free(matnames); 315 PetscCall(PetscFree(spmv_times)); 316 } 317 for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j])); 318 if (flg1 || flg2) PetscCall(MatDestroy(&A)); 319 if (bflg) PetscCall(VecDestroy(&b)); 320 PetscCall(ISDestroy(&rowperm)); 321 PetscCall(ISDestroy(&colperm)); 322 PetscCall(PetscFinalize()); 323 return 0; 324 } 325 /*TEST 326 327 build: 328 requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES) 329 depends: mmloader.c mmio.c cJSON.c 330 331 test: 332 suffix: 1 333 args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx 334 335 test: 336 suffix: 2 337 args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu 338 output_file: output/bench_spmv_1.out 339 requires: cuda 340 341 TEST*/ 342